Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
Expand All @@ -12,7 +11,7 @@ repos:
- id: end-of-file-fixer
description: Makes sure files end in a newline and only a newline.
- id: check-added-large-files
args: ['--maxkb=5000']
args: ["--maxkb=5000"]
description: Prevent giant files from being committed.
- id: check-case-conflict
description: Check for files with names that would conflict on case-insensitive filesystems like MacOS/Windows.
Expand All @@ -21,10 +20,10 @@ repos:
hooks:
- id: pydoclint
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.0
rev: v0.15.12
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-check
args: [--fix]
- id: ruff-format
- repo: local
hooks:
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/baselines/ablations.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _evaluate(

for task in tasks:
query_latencies: list[float] = []
results: list[SearchResult]
results: list[SearchResult] = []
for _ in range(_LATENCY_RUNS):
started = time.perf_counter()
results = index.search(task.query, top_k=_TOP_K, mode=mode, alpha=alpha)
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/baselines/coderankembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import time
from collections import defaultdict
from collections.abc import Sequence
from dataclasses import asdict, dataclass, field
from pathlib import Path

Expand Down Expand Up @@ -39,7 +40,7 @@ def __init__(self, model: SentenceTransformer, max_seq_length: int = 512) -> Non
self._model = model
self._model.max_seq_length = max_seq_length

def encode(self, texts: list[str]) -> np.ndarray:
def encode(self, texts: Sequence[str]) -> np.ndarray:
"""Encode texts with query or document prompt based on batch size."""
if len(texts) == 1:
return self._model.encode(texts, prompt_name="query", batch_size=1) # type: ignore[return-value]
Expand Down Expand Up @@ -77,7 +78,7 @@ def _evaluate(

for task in tasks:
query_latencies: list[float] = []
results: list[SearchResult]
results: list[SearchResult] = []
for _ in range(_LATENCY_RUNS):
started = time.perf_counter()
results = index.search(task.query, top_k=_TOP_K, mode=mode)
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ def _format_ms(v: float, _: object) -> str:


def _make_plot(out_path: Path, *, warm: bool = False) -> None:
"""
Generate a speed-vs-quality scatter plot.
"""Generate a speed-vs-quality scatter plot.
:param out_path: Destination PNG path.
:param warm: If True, use per-query latency (index pre-built). If False, use index + query latency.
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/run_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import json
import sys
import time
from collections import defaultdict
Expand Down Expand Up @@ -54,7 +53,7 @@ def _evaluate(

for task in tasks:
query_latencies: list[float] = []
results: list[SearchResult]
results: list[SearchResult] = []
for _ in range(_LATENCY_RUNS):
started = time.perf_counter()
results = index.search(task.query, top_k=_DIRECT_TOP_K)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/speed_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _bench_colgrep(spec: RepoSpec, tasks: list[Task]) -> tuple[float, tuple[floa
if proc.returncode != 0:
print(f" WARNING: colgrep init failed: {proc.stderr.strip()}", file=sys.stderr)
if "(0 files)" in proc.stdout or "(0 files)" in proc.stderr:
print(f" SKIP: colgrep indexed 0 files (unsupported language?)", file=sys.stderr)
print(" SKIP: colgrep indexed 0 files (unsupported language?)", file=sys.stderr)
return None
latencies: list[float] = []
code_only = spec.language != "bash"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ ignore = [
# Allow Any type
"ANN401",
# Pydocstyle ignores
"D100", "D101", "D104", "D203", "D212", "D401",
"D100", "D101", "D104", "D203", "D213", "D401",
]

[tool.pydoclint]
Expand Down
1 change: 0 additions & 1 deletion src/semble/index/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import numpy.typing as npt
from huggingface_hub.utils.tqdm import disable_progress_bars
from model2vec import StaticModel
from numpy import typing as npt
from vicinity.backends.basic import CosineBasicBackend
from vicinity.datatypes import QueryResult
from vicinity.utils import normalize
Expand Down
Loading