From 838ac277545a4fa1fcdc52ed1ec1e6a3356bc586 Mon Sep 17 00:00:00 2001 From: Adrien Cacciaguerra Date: Wed, 13 May 2026 08:54:52 -0700 Subject: [PATCH 1/5] chore: commit for adriencaccia --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44e0723..cc89b5a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Python Performance Lab: Sharpening Your Instincts +# Python Performance Lab: Sharpening Your Instincts - `adriencaccia` A PyCon US 2026 hands-on tutorial. You optimize intentionally slow Python code across three rounds plus a team challenge, measuring every change with From cafc8ef6635861ef283c21f9922a871d22d462a5 Mon Sep 17 00:00:00 2001 From: Adrien Cacciaguerra Date: Wed, 13 May 2026 10:12:00 -0700 Subject: [PATCH 2/5] use matrix instead of dict --- rounds/1_histogram/solution.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index dffbee5..63d7aa7 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -8,7 +8,21 @@ def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: remove this delegation and write your own implementation here. - from .baseline import compute_histogram as _baseline + # Step 1: read the whole file into memory as a single bytes object. + with open(path, "rb") as f: + data = f.read() - return _baseline(path) + # Create a 2D matrix to count bigrams + counts = [[0] * 256 for _ in range(256)] + + for i in range(len(data) - 1): + # Increment the count in each cell + counts[data[i]][data[i + 1]] += 1 + + # Convert the matrix to the original format + output = {} + for i in range(256): + for j in range(256): + if counts[i][j] > 0: + output[bytes([i, j])] = counts[i][j] + return output From 280ea702e737a6b20cd2833a823faee599a19238 Mon Sep 17 00:00:00 2001 From: Adrien Cacciaguerra Date: Wed, 13 May 2026 11:17:26 -0700 Subject: [PATCH 3/5] part3: threadpool --- rounds/3_dna/solution.py | 95 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 6 deletions(-) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 8b917da..a0c6042 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -5,13 +5,96 @@ own faster implementation. """ -from .baseline import find_matches as _baseline +from concurrent.futures import ThreadPoolExecutor +from threading import Thread + + +def find_matches_in_sequence( + record_id: str, + sequence: str, + pattern_str: str, + matches: list[tuple[str, list[int]]], +): + """Find matches in a single sequence and append to the shared matches list.""" + positions: list[int] = [] + start = 0 + while True: + pos = sequence.find(pattern_str, start) + if pos == -1: + break + positions.append(pos) + start = pos + 1 + + if positions: + matches.append((record_id, positions)) + + +def find_matches_many_threads( + fasta_path: str, pattern: bytes +) -> list[tuple[str, list[int]]]: + # Step 1: read the whole FASTA file as text and decode the pattern so the + # search below can use a single ``str`` API. + pattern_str = pattern.decode("ascii") + with open(fasta_path, "r") as f: + text = f.read() + + matches: list[tuple[str, list[int]]] = [] + + # Preprocess the sequences + sequences = [] + for record in text.split(">"): + if not record.strip(): + continue + lines = record.split("\n") + record_id = lines[0].strip() + sequence = "".join(lines[1:]).replace(" ", "") + sequences.append((record_id, sequence)) + threads = [] + for record_id, sequence in sequences: + thread = Thread( + target=find_matches_in_sequence, + args=(record_id, sequence, pattern_str, matches), + ) + thread.start() + threads.append(thread) + # Wait for all threads to finish + print(f"Waiting for {len(threads)} threads to finish...") + for thread in threads: + thread.join() + + return matches def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: - """Find every FASTA record whose sequence contains ``pattern``. + # Step 1: read the whole FASTA file as text and decode the pattern so the + # search below can use a single ``str`` API. + pattern_str = pattern.decode("ascii") + with open(fasta_path, "r") as f: + text = f.read() + + matches: list[tuple[str, list[int]]] = [] + + # Preprocess the sequences + sequences = [] + for record in text.split(">"): + if not record.strip(): + continue + lines = record.split("\n") + record_id = lines[0].strip() + sequence = "".join(lines[1:]).replace(" ", "") + sequences.append((record_id, sequence)) + + # Create a pool of threads + pool = ThreadPoolExecutor(max_workers=len(sequences)) + for record_id, sequence in sequences: + pool.submit( + find_matches_in_sequence, + record_id, + sequence, + pattern_str, + matches, + ) + # Wait for all threads to finish + pool.shutdown(wait=True) - Returns ``[(record_id, [positions...]), ...]`` in file order. - """ - # TODO: remove this delegation and write your own implementation here. - return _baseline(fasta_path, pattern) + return matches From 0d70ff63576a40477ab16ac315da983e4a6eaacf Mon Sep 17 00:00:00 2001 From: Adrien Cacciaguerra Date: Wed, 13 May 2026 11:28:00 -0700 Subject: [PATCH 4/5] part3: threadpool fewer threads --- .python-version | 2 +- rounds/3_dna/solution.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.python-version b/.python-version index d5629d4..93a848f 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.15t +3.15 diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index a0c6042..89cc005 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -85,7 +85,8 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] sequences.append((record_id, sequence)) # Create a pool of threads - pool = ThreadPoolExecutor(max_workers=len(sequences)) + pool = ThreadPoolExecutor(max_workers=16) + for record_id, sequence in sequences: pool.submit( find_matches_in_sequence, @@ -94,6 +95,14 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] pattern_str, matches, ) + # Or + # pool.map( + # lambda args: find_matches_in_sequence(*args), + # [ + # (record_id, sequence, pattern_str, matches) + # for record_id, sequence in sequences + # ], + # ) # Wait for all threads to finish pool.shutdown(wait=True) From de6ea69715d6e6ac5ad61ef35b0e1281e2551973 Mon Sep 17 00:00:00 2001 From: Adrien Cacciaguerra Date: Wed, 13 May 2026 12:05:43 -0700 Subject: [PATCH 5/5] part3: use bytes and batches --- rounds/3_dna/solution.py | 171 +++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 98 deletions(-) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 89cc005..4507c2d 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -5,105 +5,80 @@ own faster implementation. """ +from __future__ import annotations + +import os from concurrent.futures import ThreadPoolExecutor -from threading import Thread - - -def find_matches_in_sequence( - record_id: str, - sequence: str, - pattern_str: str, - matches: list[tuple[str, list[int]]], -): - """Find matches in a single sequence and append to the shared matches list.""" - positions: list[int] = [] - start = 0 - while True: - pos = sequence.find(pattern_str, start) - if pos == -1: - break - positions.append(pos) - start = pos + 1 - - if positions: - matches.append((record_id, positions)) - - -def find_matches_many_threads( - fasta_path: str, pattern: bytes -) -> list[tuple[str, list[int]]]: - # Step 1: read the whole FASTA file as text and decode the pattern so the - # search below can use a single ``str`` API. - pattern_str = pattern.decode("ascii") - with open(fasta_path, "r") as f: - text = f.read() - - matches: list[tuple[str, list[int]]] = [] - - # Preprocess the sequences - sequences = [] - for record in text.split(">"): - if not record.strip(): - continue - lines = record.split("\n") - record_id = lines[0].strip() - sequence = "".join(lines[1:]).replace(" ", "") - sequences.append((record_id, sequence)) - threads = [] - for record_id, sequence in sequences: - thread = Thread( - target=find_matches_in_sequence, - args=(record_id, sequence, pattern_str, matches), - ) - thread.start() - threads.append(thread) - # Wait for all threads to finish - print(f"Waiting for {len(threads)} threads to finish...") - for thread in threads: - thread.join() - - return matches + +_NL = 0x0A # b"\n" def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: - # Step 1: read the whole FASTA file as text and decode the pattern so the - # search below can use a single ``str`` API. - pattern_str = pattern.decode("ascii") - with open(fasta_path, "r") as f: - text = f.read() - - matches: list[tuple[str, list[int]]] = [] - - # Preprocess the sequences - sequences = [] - for record in text.split(">"): - if not record.strip(): - continue - lines = record.split("\n") - record_id = lines[0].strip() - sequence = "".join(lines[1:]).replace(" ", "") - sequences.append((record_id, sequence)) - - # Create a pool of threads - pool = ThreadPoolExecutor(max_workers=16) - - for record_id, sequence in sequences: - pool.submit( - find_matches_in_sequence, - record_id, - sequence, - pattern_str, - matches, - ) - # Or - # pool.map( - # lambda args: find_matches_in_sequence(*args), - # [ - # (record_id, sequence, pattern_str, matches) - # for record_id, sequence in sequences - # ], - # ) - # Wait for all threads to finish - pool.shutdown(wait=True) - - return matches + with open(fasta_path, "rb") as f: + data = f.read() + + # Step 1: locate every record start. A record starts with ``>`` either at + # offset 0 or immediately after a ``\n``. + starts: list[int] = [] + i = 0 + while True: + p = data.find(b">", i) + if p == -1: + break + if p == 0 or data[p - 1] == _NL: + starts.append(p) + i = p + 1 + starts.append(len(data)) # sentinel marking the end of the last record. + + num_records = len(starts) - 1 + if num_records <= 0: + return [] + + # Step 2: parallel scan. Choose enough batches to keep workers balanced + # even when record sizes vary. + n_workers = max(1, os.cpu_count() or 1) + batches = max(1, n_workers * 4) + batch_size = max(1, (num_records + batches - 1) // batches) + + def scan_batch(start_idx: int, end_idx: int) -> list[tuple[int, str, list[int]]]: + out: list[tuple[int, str, list[int]]] = [] + for j in range(start_idx, end_idx): + rec_start = starts[j] + rec_end = starts[j + 1] + + # Locate the end of the header line within this record's slice. + nl = data.find(b"\n", rec_start, rec_end) + if nl <= rec_start: + continue # Malformed or header-only. + + record_id = data[rec_start + 1 : nl].decode("ascii").strip() + + # Contiguous sequence: drop the newlines so matches that straddle + # line breaks are still found by ``bytes.find``. + sequence = data[nl + 1 : rec_end].replace(b"\n", b"") + + positions: list[int] = [] + s = 0 + while True: + p = sequence.find(pattern, s) + if p == -1: + break + positions.append(p) + s = p + 1 + + if positions: + out.append((j, record_id, positions)) + return out + + with ThreadPoolExecutor(max_workers=n_workers) as pool: + futures = [ + pool.submit(scan_batch, lo, min(lo + batch_size, num_records)) + for lo in range(0, num_records, batch_size) + ] + chunks = [f.result() for f in futures] + + # Step 3: flatten and restore file order (record index is monotonic per + # batch, but batches finish in arbitrary order). + flat = [item for chunk in chunks for item in chunk] + flat.sort(key=lambda triple: triple[0]) + return [(rid, positions) for _, rid, positions in flat]