From 7b8a9e80f3251f98de2b79a71769297fe5c4375f Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 09:32:48 -0700 Subject: [PATCH 1/6] Downgraded python --- .python-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.python-version b/.python-version index d5629d4..6324d40 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.15t +3.14 From bf2bf4b68da8d5a69cd5963fb63890bfdcd9ae6a Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 09:52:30 -0700 Subject: [PATCH 2/6] First change --- .gitignore | 3 +++ rounds/1_histogram/solution.py | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 32c32e0..8ec97df 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ __pycache__/ # CodSpeed .codspeed/ + +# Mine +codes diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index dffbee5..c36dd20 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -8,7 +8,18 @@ def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: remove this delegation and write your own implementation here. - from .baseline import compute_histogram as _baseline + # Step 1: read the whole file into memory as a single bytes object. + with open(path, "rb") as f: + data = f.read() - return _baseline(path) + # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the + # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window, + # bump the matching bucket in a ``dict`` keyed by the bigram itself. + counts: dict[bytes, int] = {} + for i in range(len(data) - 1): + bigram = data[i : i + 2] + if bigram in counts: + counts[bigram] += 1 + else: + counts[bigram] = 1 + return counts From 8b3c68ecd15117b2e6e3f739803e77052b5d18be Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 11:41:15 -0700 Subject: [PATCH 3/6] test 1 --- rounds/1_histogram/solution.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index c36dd20..e1e67b0 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -4,6 +4,7 @@ passes out of the box. Replace the body of ``compute_histogram`` with your own faster implementation. """ +import numpy as np def compute_histogram(path: str) -> dict[bytes, int]: @@ -15,11 +16,16 @@ def compute_histogram(path: str) -> dict[bytes, int]: # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window, # bump the matching bucket in a ``dict`` keyed by the bigram itself. - counts: dict[bytes, int] = {} + counts= [[0] * 256 for _ in range(256)] + for i in range(len(data) - 1): - bigram = data[i : i + 2] - if bigram in counts: - counts[bigram] += 1 - else: - counts[bigram] = 1 - return counts + a, b = data[i], data[i + 1] + counts[a][b] += 1 + + result = {} + for i, row in enumerate(counts): + for j, count in enumerate(row): + if count > 0: + bigram = bytes([i, j]) + result[bigram] = count + return result \ No newline at end of file From 12310e9aa2fdb46262a2b7029a45312f2f8a2100 Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 12:09:41 -0700 Subject: [PATCH 4/6] Just bytes --- rounds/3_dna/solution.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 8b917da..7148553 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -6,6 +6,8 @@ """ from .baseline import find_matches as _baseline +from threading import Thread +import numpy as np def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: @@ -13,5 +15,38 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] Returns ``[(record_id, [positions...]), ...]`` in file order. """ - # TODO: remove this delegation and write your own implementation here. - return _baseline(fasta_path, pattern) + # Step 1: read the whole FASTA file as text and decode the pattern so the + # search below can use a single ``str`` API. + pattern_str = pattern.decode("ascii") + with open(fasta_path, "rb") as f: + text = f.read() + matches: list[tuple[str, list[int]]] = [] + + # Step 2: split the file on '>' to peel off one record at a time. The + # first element is the chunk before any header (empty for well-formed + # files) and is skipped by the ``.strip()`` guard below. + for record in text.split(b">"): + if not record.strip(): + continue + + # Step 3: a record looks like ``"\n\n\n..."``. + # The id is the first line; the remaining lines are joined back into a + # single contiguous sequence string. + lines = record.split(b"\n") + record_id = lines[0].strip().decode("ascii") + sequence = b"".join(lines[1:]).replace(b" ", b"").decode("ascii") + + # Step 4: walk the sequence with ``str.find()``, advancing one byte + # past each hit so overlapping matches are reported too. + positions: list[int] = [] + start = 0 + while True: + pos = sequence.find(pattern_str, start) + if pos == -1: + break + positions.append(pos) + start = pos + 1 + + if positions: + matches.append((record_id, positions)) + return matches From 4a244f83c08c8edaff45411728e8b85d48e9ce10 Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 12:21:32 -0700 Subject: [PATCH 5/6] using threads --- rounds/3_dna/solution.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 7148553..8d845a8 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -25,6 +25,7 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] # Step 2: split the file on '>' to peel off one record at a time. The # first element is the chunk before any header (empty for well-formed # files) and is skipped by the ``.strip()`` guard below. + sequences = [] for record in text.split(b">"): if not record.strip(): continue @@ -35,18 +36,26 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] lines = record.split(b"\n") record_id = lines[0].strip().decode("ascii") sequence = b"".join(lines[1:]).replace(b" ", b"").decode("ascii") - - # Step 4: walk the sequence with ``str.find()``, advancing one byte - # past each hit so overlapping matches are reported too. - positions: list[int] = [] - start = 0 - while True: - pos = sequence.find(pattern_str, start) - if pos == -1: - break - positions.append(pos) - start = pos + 1 - - if positions: - matches.append((record_id, positions)) + sequences.append((record_id, sequence)) + + threads = [] + for record_id, sequence in sequences: + thread = Thread(target=match_record, args=(record_id, sequence, pattern_str, matches)) + thread.start() + threads.append(thread) + for thread in threads: + thread.join() return matches + +def match_record(record_id, sequence, pattern_str, matches): + positions: list[int] = [] + start = 0 + while True: + pos = sequence.find(pattern_str, start) + if pos == -1: + break + positions.append(pos) + start = pos + 1 + + if positions: + matches.append((record_id, positions)) \ No newline at end of file From 6a13e9486e90b91f0d56ae3dced35e2dd3cfd317 Mon Sep 17 00:00:00 2001 From: Aldo Ortega Date: Wed, 13 May 2026 12:30:24 -0700 Subject: [PATCH 6/6] rerun --- rounds/3_dna/solution.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 8d845a8..f7c4025 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -45,6 +45,7 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]] threads.append(thread) for thread in threads: thread.join() + return matches def match_record(record_id, sequence, pattern_str, matches):