Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,4 @@ scripts/
```

Each round's `data/` directory is generated locally and gitignored.
This is Venkat's PR
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ version = "0.1.0"
description = "Python Performance Lab: Sharpening Your Instincts — PyCon US 2026 tutorial"
readme = "README.md"
requires-python = ">=3.13"
dependencies = ["numpy>=2.0"]
dependencies = [
"numpy>=2.0",
"regex>=2026.5.9",
]

[dependency-groups]
dev = ["pytest>=8.0", "pytest-codspeed>=5.0.1"]
Expand Down
13 changes: 10 additions & 3 deletions rounds/1_histogram/solution.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@

def compute_histogram(path: str) -> dict[bytes, int]:
"""Frequency of every 2-byte bigram in the file at ``path``."""
# TODO: remove this delegation and write your own implementation here.
from .baseline import compute_histogram as _baseline
with open(path, "rb") as f:
data = f.read()

return _baseline(path)
bytes_mat = [[0] * 256 for _ in range(256)]

for i in range(len(data) - 1):
bytes_mat[data[i]][data[i + 1]] += 1

counts = {bytes([i, j]): c for i in range(256) for j in range(256) if (c := bytes_mat[i][j])}

return counts
36 changes: 34 additions & 2 deletions rounds/3_dna/solution.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,44 @@
"""

from .baseline import find_matches as _baseline
import regex
from multiprocessing.pool import ThreadPool

def match(record, pattern_str):
if not record.strip():
return None, []

# split record ID
lines = record.split("\n")
record_id = lines[0].strip()
sequence = "".join(lines[1:]).replace(" ", "")

# regex pattern match, get position if match
match_inds = []
for match in regex.finditer(pattern_str, sequence, overlapped=True):
match_inds.append(match.start())

return record_id, match_inds


def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
"""Find every FASTA record whose sequence contains ``pattern``.

Returns ``[(record_id, [positions...]), ...]`` in file order.
"""
# TODO: remove this delegation and write your own implementation here.
return _baseline(fasta_path, pattern)
pattern_str = pattern.decode("ascii")
with open(fasta_path, "r") as f:
text = f.read()

results = []
records = text.split(">")
args = [(record, pattern_str) for record in records]

with ThreadPool(10) as pool:

for record_id, match_inds in pool.starmap(match, args):
if len(match_inds) > 0:
# append to results
results.append((record_id, match_inds))

return results
Loading
Loading