From 2e7a58984db7a595197eabfab379579cf9c3b231 Mon Sep 17 00:00:00 2001 From: stephantul Date: Wed, 29 Apr 2026 09:55:10 +0200 Subject: [PATCH 1/2] chore: d212 -> d213, fix linting and mypy error --- .pre-commit-config.yaml | 9 ++++----- benchmarks/baselines/coderankembed.py | 3 ++- benchmarks/plot.py | 3 +-- benchmarks/run_benchmark.py | 1 - benchmarks/speed_benchmark.py | 2 +- pyproject.toml | 2 +- src/semble/index/dense.py | 1 - 7 files changed, 9 insertions(+), 12 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2434b50..fb72ab4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,3 @@ - # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: @@ -12,7 +11,7 @@ repos: - id: end-of-file-fixer description: Makes sure files end in a newline and only a newline. - id: check-added-large-files - args: ['--maxkb=5000'] + args: ["--maxkb=5000"] description: Prevent giant files from being committed. - id: check-case-conflict description: Check for files with names that would conflict on case-insensitive filesystems like MacOS/Windows. @@ -21,10 +20,10 @@ repos: hooks: - id: pydoclint - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.0 + rev: v0.15.12 hooks: - - id: ruff - args: [ --fix ] + - id: ruff-check + args: [--fix] - id: ruff-format - repo: local hooks: diff --git a/benchmarks/baselines/coderankembed.py b/benchmarks/baselines/coderankembed.py index ee4035a..7976b3b 100644 --- a/benchmarks/baselines/coderankembed.py +++ b/benchmarks/baselines/coderankembed.py @@ -3,6 +3,7 @@ import sys import time from collections import defaultdict +from collections.abc import Sequence from dataclasses import asdict, dataclass, field from pathlib import Path @@ -39,7 +40,7 @@ def __init__(self, model: SentenceTransformer, max_seq_length: int = 512) -> Non self._model = model self._model.max_seq_length = max_seq_length - def encode(self, texts: list[str]) -> np.ndarray: + def encode(self, texts: Sequence[str]) -> np.ndarray: """Encode texts with query or document prompt based on batch size.""" if len(texts) == 1: return self._model.encode(texts, prompt_name="query", batch_size=1) # type: ignore[return-value] diff --git a/benchmarks/plot.py b/benchmarks/plot.py index d4d1f20..db37f55 100644 --- a/benchmarks/plot.py +++ b/benchmarks/plot.py @@ -110,8 +110,7 @@ def _format_ms(v: float, _: object) -> str: def _make_plot(out_path: Path, *, warm: bool = False) -> None: - """ - Generate a speed-vs-quality scatter plot. + """Generate a speed-vs-quality scatter plot. :param out_path: Destination PNG path. :param warm: If True, use per-query latency (index pre-built). If False, use index + query latency. diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 34148fc..73c2442 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,5 +1,4 @@ import argparse -import json import sys import time from collections import defaultdict diff --git a/benchmarks/speed_benchmark.py b/benchmarks/speed_benchmark.py index 7bdc696..ac81715 100644 --- a/benchmarks/speed_benchmark.py +++ b/benchmarks/speed_benchmark.py @@ -194,7 +194,7 @@ def _bench_colgrep(spec: RepoSpec, tasks: list[Task]) -> tuple[float, tuple[floa if proc.returncode != 0: print(f" WARNING: colgrep init failed: {proc.stderr.strip()}", file=sys.stderr) if "(0 files)" in proc.stdout or "(0 files)" in proc.stderr: - print(f" SKIP: colgrep indexed 0 files (unsupported language?)", file=sys.stderr) + print(" SKIP: colgrep indexed 0 files (unsupported language?)", file=sys.stderr) return None latencies: list[float] = [] code_only = spec.language != "bash" diff --git a/pyproject.toml b/pyproject.toml index 00d248c..ba6f0fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,7 +110,7 @@ ignore = [ # Allow Any type "ANN401", # Pydocstyle ignores - "D100", "D101", "D104", "D203", "D212", "D401", + "D100", "D101", "D104", "D203", "D213", "D401", ] [tool.pydoclint] diff --git a/src/semble/index/dense.py b/src/semble/index/dense.py index 0aa7992..878656e 100644 --- a/src/semble/index/dense.py +++ b/src/semble/index/dense.py @@ -2,7 +2,6 @@ import numpy.typing as npt from huggingface_hub.utils.tqdm import disable_progress_bars from model2vec import StaticModel -from numpy import typing as npt from vicinity.backends.basic import CosineBasicBackend from vicinity.datatypes import QueryResult from vicinity.utils import normalize From 50f1682d5ba4ce43c258c15313421722e04f5faf Mon Sep 17 00:00:00 2001 From: stephantul Date: Wed, 29 Apr 2026 09:57:05 +0200 Subject: [PATCH 2/2] fix results missing --- benchmarks/baselines/ablations.py | 2 +- benchmarks/baselines/coderankembed.py | 2 +- benchmarks/run_benchmark.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/baselines/ablations.py b/benchmarks/baselines/ablations.py index 3969b28..3fa973d 100644 --- a/benchmarks/baselines/ablations.py +++ b/benchmarks/baselines/ablations.py @@ -71,7 +71,7 @@ def _evaluate( for task in tasks: query_latencies: list[float] = [] - results: list[SearchResult] + results: list[SearchResult] = [] for _ in range(_LATENCY_RUNS): started = time.perf_counter() results = index.search(task.query, top_k=_TOP_K, mode=mode, alpha=alpha) diff --git a/benchmarks/baselines/coderankembed.py b/benchmarks/baselines/coderankembed.py index 7976b3b..7ed91b7 100644 --- a/benchmarks/baselines/coderankembed.py +++ b/benchmarks/baselines/coderankembed.py @@ -78,7 +78,7 @@ def _evaluate( for task in tasks: query_latencies: list[float] = [] - results: list[SearchResult] + results: list[SearchResult] = [] for _ in range(_LATENCY_RUNS): started = time.perf_counter() results = index.search(task.query, top_k=_TOP_K, mode=mode) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 73c2442..2ca4bfb 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -53,7 +53,7 @@ def _evaluate( for task in tasks: query_latencies: list[float] = [] - results: list[SearchResult] + results: list[SearchResult] = [] for _ in range(_LATENCY_RUNS): started = time.perf_counter() results = index.search(task.query, top_k=_DIRECT_TOP_K)