From a74609ae72544092cda4369e9859d56bfcad10eb Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 13:43:51 +0300
Subject: [PATCH 1/6] perf: add __slots__ to Tablet to eliminate per-instance
 __dict__

Add __slots__ to the Tablet class, removing the per-instance __dict__
allocation. Tablets are created frequently (one per token range per table)
and are long-lived, so the cumulative memory savings are significant.

Before: 416 bytes/tablet (48 instance + 96 __dict__ + 80 replicas + 192 tuples)
After:  328 bytes/tablet (56 instance +  0 __dict__ + 80 replicas + 192 tuples)
Saving: 88 bytes/tablet (21%)

Scale impact (3 replicas/tablet):
  12,800 tablets (100 tables x 128): saves 1.1 MB
 128,000 tablets (1000 tables x 128): saves 10.7 MB
 256,000 tablets (1000 tables x 256): saves 21.5 MB

Tablet.from_row construction also improves:
  Before: 186 ns/call
  After:  147 ns/call (1.27x faster, -21%)
---
 cassandra/tablets.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index 96e61a50c2..4250acff4b 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -15,9 +15,7 @@ class Tablet(object):
     It stores information about each replica, its host and shard,
     and the token interval in the format (first_token, last_token].
     """
-    first_token = 0
-    last_token = 0
-    replicas = None
+    __slots__ = ('first_token', 'last_token', 'replicas')
 
     def __init__(self, first_token=0, last_token=0, replicas=None):
         self.first_token = first_token

From 06cf4de19d891ca1a83da97d536f4b7eb98688f8 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 13:44:48 +0300
Subject: [PATCH 2/6] perf: store Tablet.replicas as tuple instead of list

Replicas are never mutated after Tablet construction; convert to tuple
in __init__ to save 8 bytes per tablet (list overallocates for future
appends that never happen) and communicate immutability.

Before: 328 bytes/tablet (replicas container: 80 bytes as list)
After:  320 bytes/tablet (replicas container: 72 bytes as tuple)
Saving: 8 bytes/tablet (2.4%)

Combined with __slots__ (commit 1), total savings so far: 96 bytes/tablet.

Scale impact (3 replicas/tablet):
  128,000 tablets: saves ~1.0 MB (tuple) + 10.7 MB (slots) = 11.7 MB total
  256,000 tablets: saves ~2.0 MB (tuple) + 21.5 MB (slots) = 23.5 MB total
---
 cassandra/tablets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index 4250acff4b..2c90cf02b3 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -20,7 +20,7 @@ class Tablet(object):
     def __init__(self, first_token=0, last_token=0, replicas=None):
         self.first_token = first_token
         self.last_token = last_token
-        self.replicas = replicas
+        self.replicas = tuple(replicas) if replicas is not None else None
 
     def __str__(self):
         return "<Tablet: first_token=%s last_token=%s replicas=%s>" \

From 1d25663c0e1b8836319524eac3a4c77af6bc3e74 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 14:17:19 +0300
Subject: [PATCH 3/6] perf: cache _replica_dict on Tablet for O(1) host/shard
 lookup

Build a {host_id: shard_id} dict once at Tablet construction time so
that policies.py and pool.py can replace set(map(lambda ...)) and
linear scans with O(1) dict operations.

- Add _replica_dict to __slots__
- Build dict from the materialized tuple (not the raw replicas arg)
  to avoid double-consuming a one-shot iterator
- Update DCAwareRoundRobinPolicy to use tablet._replica_dict keys
- Update HostConnection to use tablet._replica_dict.get() for shard
- Rewrite replica_contains_host_id() to use dict membership
- Add 7 unit tests covering dict construction, lookup, host membership,
  tuple storage, and the iterator edge case
---
 cassandra/policies.py      |  4 +--
 cassandra/pool.py          |  5 +---
 cassandra/tablets.py       | 15 ++++++----
 tests/unit/test_tablets.py | 60 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/cassandra/policies.py b/cassandra/policies.py
index ceb5ebdc45..65845e55a7 100644
--- a/cassandra/policies.py
+++ b/cassandra/policies.py
@@ -507,10 +507,10 @@ def make_query_plan(self, working_keyspace=None, query=None):
             keyspace, query.table, self._cluster_metadata.token_map.token_class.from_key(query.routing_key))
 
         if tablet is not None:
-            replicas_mapped = set(map(lambda r: r[0], tablet.replicas))
+            replica_dict = tablet._replica_dict
             child_plan = child.make_query_plan(keyspace, query)
 
-            replicas = [host for host in child_plan if host.host_id in replicas_mapped]
+            replicas = [host for host in child_plan if host.host_id in replica_dict]
         else:
             replicas = self._cluster_metadata.get_replicas(keyspace, query.routing_key)
 
diff --git a/cassandra/pool.py b/cassandra/pool.py
index 227e1b5315..5b370f36d3 100644
--- a/cassandra/pool.py
+++ b/cassandra/pool.py
@@ -462,10 +462,7 @@ def _get_connection_for_routing_key(self, routing_key=None, keyspace=None, table
                 tablet = self._session.cluster.metadata._tablets.get_tablet_for_key(keyspace, table, t)
 
                 if tablet is not None:
-                    for replica in tablet.replicas:
-                        if replica[0] == self.host.host_id:
-                            shard_id = replica[1]
-                            break
+                    shard_id = tablet._replica_dict.get(self.host.host_id)
 
             if shard_id is None:
                 shard_id = self.host.sharding_info.shard_id_from_token(t.value)
diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index 2c90cf02b3..70caab2a13 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -15,12 +15,18 @@ class Tablet(object):
     It stores information about each replica, its host and shard,
     and the token interval in the format (first_token, last_token].
     """
-    __slots__ = ('first_token', 'last_token', 'replicas')
+    __slots__ = ('first_token', 'last_token', 'replicas', '_replica_dict')
 
     def __init__(self, first_token=0, last_token=0, replicas=None):
         self.first_token = first_token
         self.last_token = last_token
-        self.replicas = tuple(replicas) if replicas is not None else None
+        if replicas is not None:
+            replicas_tuple = tuple(replicas)
+            self.replicas = replicas_tuple
+            self._replica_dict = {r[0]: r[1] for r in replicas_tuple}
+        else:
+            self.replicas = None
+            self._replica_dict = {}
 
     def __str__(self):
         return "<Tablet: first_token=%s last_token=%s replicas=%s>" \
@@ -39,10 +45,7 @@ def from_row(first_token, last_token, replicas):
         return None
 
     def replica_contains_host_id(self, uuid: UUID) -> bool:
-        for replica in self.replicas:
-            if replica[0] == uuid:
-                return True
-        return False
+        return uuid in self._replica_dict
 
 
 class Tablets(object):
diff --git a/tests/unit/test_tablets.py b/tests/unit/test_tablets.py
index 7a40e7de4d..0a9cfe355a 100644
--- a/tests/unit/test_tablets.py
+++ b/tests/unit/test_tablets.py
@@ -1,4 +1,5 @@
 import unittest
+from uuid import UUID
 
 from cassandra.tablets import Tablets, Tablet
 
@@ -124,3 +125,62 @@ def __init__(self, v):
         # Token value 50 is not > first_token (100) of the tablet whose
         # last_token (200) is >= 50, so no match.
         self.assertIsNone(tablets.get_tablet_for_key("ks", "tb", Token(50)))
+
+
+class TabletReplicaDictTest(unittest.TestCase):
+    """Tests for Tablet._replica_dict cached lookup."""
+
+    def test_replica_dict_built_from_replicas(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+        t = Tablet(0, 100, [(u1, 3), (u2, 7)])
+        self.assertEqual(t._replica_dict, {u1: 3, u2: 7})
+
+    def test_replica_dict_empty_when_no_replicas(self):
+        t = Tablet(0, 100, None)
+        self.assertEqual(t._replica_dict, {})
+
+    def test_replica_dict_contains_host(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+        u3 = UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
+        t = Tablet(0, 100, [(u1, 3), (u2, 7)])
+        self.assertIn(u1, t._replica_dict)
+        self.assertIn(u2, t._replica_dict)
+        self.assertNotIn(u3, t._replica_dict)
+
+    def test_replica_dict_shard_lookup(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+        t = Tablet(0, 100, [(u1, 3), (u2, 7)])
+        self.assertEqual(t._replica_dict.get(u1), 3)
+        self.assertEqual(t._replica_dict.get(u2), 7)
+        self.assertIsNone(t._replica_dict.get(UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')))
+
+    def test_replica_contains_host_id_uses_dict(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+        t = Tablet(0, 100, [(u1, 3), (u2, 7)])
+        self.assertTrue(t.replica_contains_host_id(u1))
+        self.assertTrue(t.replica_contains_host_id(u2))
+        self.assertFalse(t.replica_contains_host_id(UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')))
+
+    def test_replicas_stored_as_tuple(self):
+        t = Tablet(0, 100, [("host1", 0), ("host2", 1)])
+        self.assertIsInstance(t.replicas, tuple)
+
+    def test_replica_dict_from_iterator(self):
+        """Ensure _replica_dict is correctly built even when replicas is a
+        one-shot iterator (generator), not a reusable list."""
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+
+        def gen():
+            yield (u1, 3)
+            yield (u2, 7)
+
+        t = Tablet(0, 100, gen())
+        self.assertEqual(t.replicas, ((u1, 3), (u2, 7)))
+        self.assertEqual(t._replica_dict, {u1: 3, u2: 7})
+        self.assertTrue(t.replica_contains_host_id(u1))
+        self.assertTrue(t.replica_contains_host_id(u2))

From a38a3402aed69b1ecc5e64f612b6399317998869 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 14:31:39 +0300
Subject: [PATCH 4/6] perf: streamline Tablet.from_row by inlining validation

Remove the _is_valid_tablet staticmethod indirection and replace the
two-step from_row -> _is_valid_tablet -> Tablet() chain with a single
truthiness guard and direct construction.  Saves ~54 ns/call (12%)
by eliminating a staticmethod descriptor lookup, an extra function
call, and redundant 'is not None' check (replicas from CQL
deserialization is always a list or None).
---
 cassandra/tablets.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index 70caab2a13..afc129a95f 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -33,16 +33,11 @@ def __str__(self):
                % (self.first_token, self.last_token, self.replicas)
     __repr__ = __str__
 
-    @staticmethod
-    def _is_valid_tablet(replicas):
-        return replicas is not None and len(replicas) != 0
-
     @staticmethod
     def from_row(first_token, last_token, replicas):
-        if Tablet._is_valid_tablet(replicas):
-            tablet = Tablet(first_token, last_token, replicas)
-            return tablet
-        return None
+        if not replicas:
+            return None
+        return Tablet(first_token, last_token, replicas)
 
     def replica_contains_host_id(self, uuid: UUID) -> bool:
         return uuid in self._replica_dict

From ee5f840db0576f073ba3c018919aba2c1b69cd50 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 14:52:24 +0300
Subject: [PATCH 5/6] perf: eliminate bisect key= callback via parallel token
 index lists

Maintain parallel _first_tokens and _last_tokens dicts alongside
_tablets, each mapping (keyspace, table) to a plain list[int].  This
lets bisect_left run entirely in C on native ints instead of calling
an attrgetter callback on every comparison during binary search.

Follow-up to PR #757 which identified the opportunity: its own
benchmarks showed bisect_left without key= is 2.7-5.7x faster than
with key=attrgetter.

Results (best-of-5, Python 3.14):

  get_tablet_for_key (hit):
  Tablets    Before    After    Saved   Speedup
       10    293ns    216ns     78ns     1.36x
      100    351ns    233ns    118ns     1.51x
    1,000    448ns    267ns    181ns     1.68x
   10,000    537ns    282ns    255ns     1.90x

All three dicts are kept in sync by add_tablet, drop_tablets, and
drop_tablets_by_host_id.  The attrgetter imports are no longer needed
and have been removed.
---
 cassandra/tablets.py | 56 ++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index afc129a95f..8ebc84c41a 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -1,13 +1,8 @@
 from bisect import bisect_left
-from operator import attrgetter
 from threading import Lock
 from typing import Optional
 from uuid import UUID
 
-# C-accelerated attrgetter avoids per-call lambda allocation overhead
-_get_first_token = attrgetter("first_token")
-_get_last_token = attrgetter("last_token")
-
 
 class Tablet(object):
     """
@@ -45,29 +40,45 @@ def replica_contains_host_id(self, uuid: UUID) -> bool:
 
 class Tablets(object):
     _lock = None
-    _tablets = {}
+    _tablets = {}       # (keyspace, table) -> list[Tablet]
+    _first_tokens = {}  # (keyspace, table) -> list[int]
+    _last_tokens = {}   # (keyspace, table) -> list[int]
 
     def __init__(self, tablets):
         self._tablets = tablets
+        # Build parallel token index lists from any pre-populated data
+        self._first_tokens = {
+            key: [t.first_token for t in tlist]
+            for key, tlist in tablets.items()
+        }
+        self._last_tokens = {
+            key: [t.last_token for t in tlist]
+            for key, tlist in tablets.items()
+        }
         self._lock = Lock()
 
     def table_has_tablets(self, keyspace, table) -> bool:
         return bool(self._tablets.get((keyspace, table), []))
 
     def get_tablet_for_key(self, keyspace, table, t):
-        tablet = self._tablets.get((keyspace, table), [])
-        if not tablet:
+        key = (keyspace, table)
+        last_tokens = self._last_tokens.get(key)
+        if not last_tokens:
             return None
 
-        id = bisect_left(tablet, t.value, key=_get_last_token)
-        if id < len(tablet) and t.value > tablet[id].first_token:
-            return tablet[id]
+        token_value = t.value
+        id = bisect_left(last_tokens, token_value)
+        if id < len(last_tokens) and token_value > self._first_tokens[key][id]:
+            return self._tablets[key][id]
         return None
 
     def drop_tablets(self, keyspace: str, table: Optional[str] = None):
         with self._lock:
             if table is not None:
-                self._tablets.pop((keyspace, table), None)
+                key = (keyspace, table)
+                self._tablets.pop(key, None)
+                self._first_tokens.pop(key, None)
+                self._last_tokens.pop(key, None)
                 return
 
             to_be_deleted = []
@@ -77,6 +88,8 @@ def drop_tablets(self, keyspace: str, table: Optional[str] = None):
 
             for key in to_be_deleted:
                 del self._tablets[key]
+                self._first_tokens.pop(key, None)
+                self._last_tokens.pop(key, None)
 
     def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
         if host_id is None:
@@ -90,23 +103,32 @@ def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
 
                 for tablet_id in reversed(to_be_deleted):
                     tablets.pop(tablet_id)
+                    self._first_tokens[key].pop(tablet_id)
+                    self._last_tokens[key].pop(tablet_id)
 
     def add_tablet(self, keyspace, table, tablet):
         with self._lock:
-            tablets_for_table = self._tablets.setdefault((keyspace, table), [])
+            key = (keyspace, table)
+            tablets_for_table = self._tablets.setdefault(key, [])
+            first_tokens = self._first_tokens.setdefault(key, [])
+            last_tokens = self._last_tokens.setdefault(key, [])
 
             # find first overlapping range
-            start = bisect_left(tablets_for_table, tablet.first_token, key=_get_first_token)
-            if start > 0 and tablets_for_table[start - 1].last_token > tablet.first_token:
+            start = bisect_left(first_tokens, tablet.first_token)
+            if start > 0 and last_tokens[start - 1] > tablet.first_token:
                 start = start - 1
 
             # find last overlapping range
-            end = bisect_left(tablets_for_table, tablet.last_token, key=_get_last_token)
-            if end < len(tablets_for_table) and tablets_for_table[end].first_token >= tablet.last_token:
+            end = bisect_left(last_tokens, tablet.last_token)
+            if end < len(last_tokens) and first_tokens[end] >= tablet.last_token:
                 end = end - 1
 
             if start <= end:
                 del tablets_for_table[start:end + 1]
+                del first_tokens[start:end + 1]
+                del last_tokens[start:end + 1]
 
             tablets_for_table.insert(start, tablet)
+            first_tokens.insert(start, tablet.first_token)
+            last_tokens.insert(start, tablet.last_token)
 

From 3d5cfbd9a9a09acac2956366d878b5e44d98964c Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Thu, 9 Apr 2026 20:32:21 +0300
Subject: [PATCH 6/6] perf: batch-filter drop_tablets_by_host_id instead of
 triple pop

Replace the per-tablet reversed pop() loop (O(k*n) for each of three
parallel lists) with a single-pass index filter that rebuilds the
lists once.  This avoids repeated list element shifting and scales
better when many tablets are dropped at once.

Benchmark (3 replicas/tablet, ~33% dropped):
  Tablets   Old (triple-pop)   New (batch-filter)   Speedup
     100          123 us             128 us           ~1.0x
   1,000        1,375 us           1,113 us           1.24x
  10,000       25,429 us          13,079 us           1.94x

Add 3 unit tests for drop_tablets_by_host_id covering matching,
None host_id, and nonexistent host_id.
---
 cassandra/tablets.py       | 19 ++++++++++---------
 tests/unit/test_tablets.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/cassandra/tablets.py b/cassandra/tablets.py
index 8ebc84c41a..3f81334688 100644
--- a/cassandra/tablets.py
+++ b/cassandra/tablets.py
@@ -96,15 +96,16 @@ def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
             return
         with self._lock:
             for key, tablets in self._tablets.items():
-                to_be_deleted = []
-                for tablet_id, tablet in enumerate(tablets):
-                    if tablet.replica_contains_host_id(host_id):
-                        to_be_deleted.append(tablet_id)
-
-                for tablet_id in reversed(to_be_deleted):
-                    tablets.pop(tablet_id)
-                    self._first_tokens[key].pop(tablet_id)
-                    self._last_tokens[key].pop(tablet_id)
+                # Filter in one pass instead of popping one-by-one (O(n) vs O(k*n))
+                keep = [i for i, t in enumerate(tablets)
+                        if not t.replica_contains_host_id(host_id)]
+                if len(keep) == len(tablets):
+                    continue  # nothing to drop
+                self._tablets[key] = [tablets[i] for i in keep]
+                first = self._first_tokens[key]
+                last = self._last_tokens[key]
+                self._first_tokens[key] = [first[i] for i in keep]
+                self._last_tokens[key] = [last[i] for i in keep]
 
     def add_tablet(self, keyspace, table, tablet):
         with self._lock:
diff --git a/tests/unit/test_tablets.py b/tests/unit/test_tablets.py
index 0a9cfe355a..d0e09527bc 100644
--- a/tests/unit/test_tablets.py
+++ b/tests/unit/test_tablets.py
@@ -184,3 +184,38 @@ def gen():
         self.assertEqual(t._replica_dict, {u1: 3, u2: 7})
         self.assertTrue(t.replica_contains_host_id(u1))
         self.assertTrue(t.replica_contains_host_id(u2))
+
+
+class DropTabletsByHostIdTest(unittest.TestCase):
+    """Tests for Tablets.drop_tablets_by_host_id batch-filter path."""
+
+    def test_drop_removes_matching_tablets(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u2 = UUID('87654321-4321-8765-4321-876543218765')
+        t1 = Tablet(0, 100, [(u1, 0)])
+        t2 = Tablet(100, 200, [(u2, 0)])
+        t3 = Tablet(200, 300, [(u1, 1), (u2, 1)])
+        tablets = Tablets({("ks", "tb"): [t1, t2, t3]})
+
+        tablets.drop_tablets_by_host_id(u1)
+
+        remaining = tablets._tablets[("ks", "tb")]
+        self.assertEqual(len(remaining), 1)
+        self.assertIs(remaining[0], t2)
+        # Verify token index lists are in sync
+        self.assertEqual(tablets._first_tokens[("ks", "tb")], [100])
+        self.assertEqual(tablets._last_tokens[("ks", "tb")], [200])
+
+    def test_drop_none_host_id_is_noop(self):
+        t1 = Tablet(0, 100, [("host1", 0)])
+        tablets = Tablets({("ks", "tb"): [t1]})
+        tablets.drop_tablets_by_host_id(None)
+        self.assertEqual(len(tablets._tablets[("ks", "tb")]), 1)
+
+    def test_drop_nonexistent_host_id_is_noop(self):
+        u1 = UUID('12345678-1234-5678-1234-567812345678')
+        u_missing = UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
+        t1 = Tablet(0, 100, [(u1, 0)])
+        tablets = Tablets({("ks", "tb"): [t1]})
+        tablets.drop_tablets_by_host_id(u_missing)
+        self.assertEqual(len(tablets._tablets[("ks", "tb")]), 1)