From 838ac277545a4fa1fcdc52ed1ec1e6a3356bc586 Mon Sep 17 00:00:00 2001
From: Adrien Cacciaguerra <adrien.caccia@gmail.com>
Date: Wed, 13 May 2026 08:54:52 -0700
Subject: [PATCH 1/5] chore: commit for adriencaccia

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 44e0723..cc89b5a 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Python Performance Lab: Sharpening Your Instincts
+# Python Performance Lab: Sharpening Your Instincts - `adriencaccia`
 
 A PyCon US 2026 hands-on tutorial. You optimize intentionally slow Python code
 across three rounds plus a team challenge, measuring every change with

From cafc8ef6635861ef283c21f9922a871d22d462a5 Mon Sep 17 00:00:00 2001
From: Adrien Cacciaguerra <adrien.caccia@gmail.com>
Date: Wed, 13 May 2026 10:12:00 -0700
Subject: [PATCH 2/5] use matrix instead of dict

---
 rounds/1_histogram/solution.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
index dffbee5..63d7aa7 100644
--- a/rounds/1_histogram/solution.py
+++ b/rounds/1_histogram/solution.py
@@ -8,7 +8,21 @@
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
-    # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
+    # Step 1: read the whole file into memory as a single bytes object.
+    with open(path, "rb") as f:
+        data = f.read()
 
-    return _baseline(path)
+    # Create a 2D matrix to count bigrams
+    counts = [[0] * 256 for _ in range(256)]
+
+    for i in range(len(data) - 1):
+        # Increment the count in each cell
+        counts[data[i]][data[i + 1]] += 1
+
+    # Convert the matrix to the original format
+    output = {}
+    for i in range(256):
+        for j in range(256):
+            if counts[i][j] > 0:
+                output[bytes([i, j])] = counts[i][j]
+    return output

From 280ea702e737a6b20cd2833a823faee599a19238 Mon Sep 17 00:00:00 2001
From: Adrien Cacciaguerra <adrien.caccia@gmail.com>
Date: Wed, 13 May 2026 11:17:26 -0700
Subject: [PATCH 3/5] part3: threadpool

---
 rounds/3_dna/solution.py | 95 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 6 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 8b917da..a0c6042 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -5,13 +5,96 @@
 own faster implementation.
 """
 
-from .baseline import find_matches as _baseline
+from concurrent.futures import ThreadPoolExecutor
+from threading import Thread
+
+
+def find_matches_in_sequence(
+    record_id: str,
+    sequence: str,
+    pattern_str: str,
+    matches: list[tuple[str, list[int]]],
+):
+    """Find matches in a single sequence and append to the shared matches list."""
+    positions: list[int] = []
+    start = 0
+    while True:
+        pos = sequence.find(pattern_str, start)
+        if pos == -1:
+            break
+        positions.append(pos)
+        start = pos + 1
+
+    if positions:
+        matches.append((record_id, positions))
+
+
+def find_matches_many_threads(
+    fasta_path: str, pattern: bytes
+) -> list[tuple[str, list[int]]]:
+    # Step 1: read the whole FASTA file as text and decode the pattern so the
+    # search below can use a single ``str`` API.
+    pattern_str = pattern.decode("ascii")
+    with open(fasta_path, "r") as f:
+        text = f.read()
+
+    matches: list[tuple[str, list[int]]] = []
+
+    # Preprocess the sequences
+    sequences = []
+    for record in text.split(">"):
+        if not record.strip():
+            continue
+        lines = record.split("\n")
+        record_id = lines[0].strip()
+        sequence = "".join(lines[1:]).replace(" ", "")
+        sequences.append((record_id, sequence))
+    threads = []
+    for record_id, sequence in sequences:
+        thread = Thread(
+            target=find_matches_in_sequence,
+            args=(record_id, sequence, pattern_str, matches),
+        )
+        thread.start()
+        threads.append(thread)
+    # Wait for all threads to finish
+    print(f"Waiting for {len(threads)} threads to finish...")
+    for thread in threads:
+        thread.join()
+
+    return matches
 
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
-    """Find every FASTA record whose sequence contains ``pattern``.
+    # Step 1: read the whole FASTA file as text and decode the pattern so the
+    # search below can use a single ``str`` API.
+    pattern_str = pattern.decode("ascii")
+    with open(fasta_path, "r") as f:
+        text = f.read()
+
+    matches: list[tuple[str, list[int]]] = []
+
+    # Preprocess the sequences
+    sequences = []
+    for record in text.split(">"):
+        if not record.strip():
+            continue
+        lines = record.split("\n")
+        record_id = lines[0].strip()
+        sequence = "".join(lines[1:]).replace(" ", "")
+        sequences.append((record_id, sequence))
+
+    # Create a pool of threads
+    pool = ThreadPoolExecutor(max_workers=len(sequences))
+    for record_id, sequence in sequences:
+        pool.submit(
+            find_matches_in_sequence,
+            record_id,
+            sequence,
+            pattern_str,
+            matches,
+        )
+    # Wait for all threads to finish
+    pool.shutdown(wait=True)
 
-    Returns ``[(record_id, [positions...]), ...]`` in file order.
-    """
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(fasta_path, pattern)
+    return matches

From 0d70ff63576a40477ab16ac315da983e4a6eaacf Mon Sep 17 00:00:00 2001
From: Adrien Cacciaguerra <adrien.caccia@gmail.com>
Date: Wed, 13 May 2026 11:28:00 -0700
Subject: [PATCH 4/5] part3: threadpool fewer threads

---
 .python-version          |  2 +-
 rounds/3_dna/solution.py | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/.python-version b/.python-version
index d5629d4..93a848f 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.15t
+3.15
diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index a0c6042..89cc005 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -85,7 +85,8 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
         sequences.append((record_id, sequence))
 
     # Create a pool of threads
-    pool = ThreadPoolExecutor(max_workers=len(sequences))
+    pool = ThreadPoolExecutor(max_workers=16)
+
     for record_id, sequence in sequences:
         pool.submit(
             find_matches_in_sequence,
@@ -94,6 +95,14 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
             pattern_str,
             matches,
         )
+    # Or
+    # pool.map(
+    #     lambda args: find_matches_in_sequence(*args),
+    #     [
+    #         (record_id, sequence, pattern_str, matches)
+    #         for record_id, sequence in sequences
+    #     ],
+    # )
     # Wait for all threads to finish
     pool.shutdown(wait=True)
 

From de6ea69715d6e6ac5ad61ef35b0e1281e2551973 Mon Sep 17 00:00:00 2001
From: Adrien Cacciaguerra <adrien.caccia@gmail.com>
Date: Wed, 13 May 2026 12:05:43 -0700
Subject: [PATCH 5/5] part3: use bytes and batches

---
 rounds/3_dna/solution.py | 171 +++++++++++++++++----------------------
 1 file changed, 73 insertions(+), 98 deletions(-)

diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
index 89cc005..4507c2d 100644
--- a/rounds/3_dna/solution.py
+++ b/rounds/3_dna/solution.py
@@ -5,105 +5,80 @@
 own faster implementation.
 """
 
+from __future__ import annotations
+
+import os
 from concurrent.futures import ThreadPoolExecutor
-from threading import Thread
-
-
-def find_matches_in_sequence(
-    record_id: str,
-    sequence: str,
-    pattern_str: str,
-    matches: list[tuple[str, list[int]]],
-):
-    """Find matches in a single sequence and append to the shared matches list."""
-    positions: list[int] = []
-    start = 0
-    while True:
-        pos = sequence.find(pattern_str, start)
-        if pos == -1:
-            break
-        positions.append(pos)
-        start = pos + 1
-
-    if positions:
-        matches.append((record_id, positions))
-
-
-def find_matches_many_threads(
-    fasta_path: str, pattern: bytes
-) -> list[tuple[str, list[int]]]:
-    # Step 1: read the whole FASTA file as text and decode the pattern so the
-    # search below can use a single ``str`` API.
-    pattern_str = pattern.decode("ascii")
-    with open(fasta_path, "r") as f:
-        text = f.read()
-
-    matches: list[tuple[str, list[int]]] = []
-
-    # Preprocess the sequences
-    sequences = []
-    for record in text.split(">"):
-        if not record.strip():
-            continue
-        lines = record.split("\n")
-        record_id = lines[0].strip()
-        sequence = "".join(lines[1:]).replace(" ", "")
-        sequences.append((record_id, sequence))
-    threads = []
-    for record_id, sequence in sequences:
-        thread = Thread(
-            target=find_matches_in_sequence,
-            args=(record_id, sequence, pattern_str, matches),
-        )
-        thread.start()
-        threads.append(thread)
-    # Wait for all threads to finish
-    print(f"Waiting for {len(threads)} threads to finish...")
-    for thread in threads:
-        thread.join()
-
-    return matches
+
+_NL = 0x0A  # b"\n"
 
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
-    # Step 1: read the whole FASTA file as text and decode the pattern so the
-    # search below can use a single ``str`` API.
-    pattern_str = pattern.decode("ascii")
-    with open(fasta_path, "r") as f:
-        text = f.read()
-
-    matches: list[tuple[str, list[int]]] = []
-
-    # Preprocess the sequences
-    sequences = []
-    for record in text.split(">"):
-        if not record.strip():
-            continue
-        lines = record.split("\n")
-        record_id = lines[0].strip()
-        sequence = "".join(lines[1:]).replace(" ", "")
-        sequences.append((record_id, sequence))
-
-    # Create a pool of threads
-    pool = ThreadPoolExecutor(max_workers=16)
-
-    for record_id, sequence in sequences:
-        pool.submit(
-            find_matches_in_sequence,
-            record_id,
-            sequence,
-            pattern_str,
-            matches,
-        )
-    # Or
-    # pool.map(
-    #     lambda args: find_matches_in_sequence(*args),
-    #     [
-    #         (record_id, sequence, pattern_str, matches)
-    #         for record_id, sequence in sequences
-    #     ],
-    # )
-    # Wait for all threads to finish
-    pool.shutdown(wait=True)
-
-    return matches
+    with open(fasta_path, "rb") as f:
+        data = f.read()
+
+    # Step 1: locate every record start. A record starts with ``>`` either at
+    # offset 0 or immediately after a ``\n``.
+    starts: list[int] = []
+    i = 0
+    while True:
+        p = data.find(b">", i)
+        if p == -1:
+            break
+        if p == 0 or data[p - 1] == _NL:
+            starts.append(p)
+        i = p + 1
+    starts.append(len(data))  # sentinel marking the end of the last record.
+
+    num_records = len(starts) - 1
+    if num_records <= 0:
+        return []
+
+    # Step 2: parallel scan. Choose enough batches to keep workers balanced
+    # even when record sizes vary.
+    n_workers = max(1, os.cpu_count() or 1)
+    batches = max(1, n_workers * 4)
+    batch_size = max(1, (num_records + batches - 1) // batches)
+
+    def scan_batch(start_idx: int, end_idx: int) -> list[tuple[int, str, list[int]]]:
+        out: list[tuple[int, str, list[int]]] = []
+        for j in range(start_idx, end_idx):
+            rec_start = starts[j]
+            rec_end = starts[j + 1]
+
+            # Locate the end of the header line within this record's slice.
+            nl = data.find(b"\n", rec_start, rec_end)
+            if nl <= rec_start:
+                continue  # Malformed or header-only.
+
+            record_id = data[rec_start + 1 : nl].decode("ascii").strip()
+
+            # Contiguous sequence: drop the newlines so matches that straddle
+            # line breaks are still found by ``bytes.find``.
+            sequence = data[nl + 1 : rec_end].replace(b"\n", b"")
+
+            positions: list[int] = []
+            s = 0
+            while True:
+                p = sequence.find(pattern, s)
+                if p == -1:
+                    break
+                positions.append(p)
+                s = p + 1
+
+            if positions:
+                out.append((j, record_id, positions))
+        return out
+
+    with ThreadPoolExecutor(max_workers=n_workers) as pool:
+        futures = [
+            pool.submit(scan_batch, lo, min(lo + batch_size, num_records))
+            for lo in range(0, num_records, batch_size)
+        ]
+        chunks = [f.result() for f in futures]
+
+    # Step 3: flatten and restore file order (record index is monotonic per
+    # batch, but batches finish in arbitrary order).
+    flat = [item for chunk in chunks for item in chunk]
+    flat.sort(key=lambda triple: triple[0])
+    return [(rid, positions) for _, rid, positions in flat]