Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
23d8b86
feat(gnomad): migrate gnomAD linkage to the Allele model with version…
bencap Jun 22, 2026
e631d00
feat: link VEP consequences to alleles with Ensembl-release versioning
bencap Jun 22, 2026
ac41788
feat(clinvar): migrate ClinVar annotation onto the allele model
bencap Jun 22, 2026
8c5f442
refactor(annotation): retire the HGVS and variant-translation jobs
bencap Jun 23, 2026
f532c78
feat(alleles): cross-layer allele equivalence query with as_of support
bencap Jun 23, 2026
ebbabb8
feat(annotation): add append-only AnnotationEvent log and status voca…
bencap Jun 30, 2026
4abba2f
refactor(alleles): collapse to one work-unit per allele
bencap Jun 30, 2026
975d5b1
refactor(annotation): rewrite AnnotationStatusManager to emit events
bencap Jun 30, 2026
d48d1e0
feat(gnomad): derive annotation events from link verdicts
bencap Jun 30, 2026
2be1eb0
feat(vep): derive annotation events from resolution outcomes
bencap Jun 30, 2026
e206661
feat(clingen): emit annotation events from CAR/LDH jobs
bencap Jun 30, 2026
4df95bd
feat(clinvar): emit annotation events from ClinVar job
bencap Jun 30, 2026
0941a28
feat(mapping): emit annotation events
bencap Jun 30, 2026
deaf035
feat(reverse-translation): emit annotation events
bencap Jun 30, 2026
5a1d636
feat(scripts): add pipeline tracking and event-aware annotations script
bencap Jun 30, 2026
070b622
fix(clingen): re-register existing CAIDs when force_reregister is set
bencap Jun 30, 2026
8ff66c0
fix(vep): route VEP HTTP rejections to Variant Recoder
bencap Jun 30, 2026
6ce3c4e
fix(vrs): narrow RLE→LSE coercion types to satisfy mypy
bencap Jun 30, 2026
2fee00e
fix(clinvar): supersede links on the DB clock, not naive datetime.now()
bencap Jun 30, 2026
adff332
perf(annotation): index annotation_event.score_set_id
bencap Jun 30, 2026
c87dbbf
chore(alembic): rename closure-tables migration to match its contents
bencap Jun 30, 2026
14a0f81
test(annotation): restore status-manager query edge-case coverage
bencap Jun 30, 2026
de83977
fix(hgvs): guard accession-less cis-phased split, order joined compon…
bencap Jun 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions alembic/versions/a7c4e9d2f1b8_add_vep_allele_consequences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""add vep_allele_consequences table

Revision ID: a7c4e9d2f1b8
Revises: e5f7a9c1b3d4
Create Date: 2026-06-22

New valid-time table holding a deduplicated allele's VEP functional consequence, replacing the frozen
vep_functional_consequence/vep_access_date columns on mapped_variants for new-model writes (Step 2 of
the annotation infrastructure migration, docs/design/annotation-infrastructure-migration.md). A row is
live while valid_to is NULL; the partial unique index enforces a single live consequence per allele
(VEP's most-severe consequence is one current value, so a change supersedes rather than accumulates).
functional_consequence is nullable (reserved for a future negative cache). source_version is the
Ensembl release the consequence was resolved under (coordinated software + transcript set + vocabulary),
which version-keys the refresh skip like gnomAD's db_version; access_date is retained as a "last
confirmed" audit stamp. The VEP columns on mapped_variants are left untouched (frozen serving).
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "a7c4e9d2f1b8"
down_revision = "e5f7a9c1b3d4"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.create_table(
"vep_allele_consequences",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("allele_id", sa.Integer(), nullable=False),
sa.Column("functional_consequence", sa.String(), nullable=True),
sa.Column("source_version", sa.String(), nullable=False),
sa.Column("access_date", sa.Date(), nullable=False),
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["allele_id"],
["alleles.id"],
name="fk_vep_allele_consequences_allele_id",
ondelete="RESTRICT",
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_vep_allele_consequences_allele_id",
"vep_allele_consequences",
["allele_id"],
)
# One live consequence per allele: VEP's most-severe consequence is a single current value, so a
# changed result supersedes the prior row rather than accumulating one live row per access.
op.create_index(
"uq_vep_allele_consequences_live",
"vep_allele_consequences",
["allele_id"],
unique=True,
postgresql_where=sa.text("valid_to IS NULL"),
)


def downgrade() -> None:
op.drop_index("uq_vep_allele_consequences_live", table_name="vep_allele_consequences")
op.drop_index("ix_vep_allele_consequences_allele_id", table_name="vep_allele_consequences")
op.drop_table("vep_allele_consequences")
79 changes: 79 additions & 0 deletions alembic/versions/a7e1c4f9b3d2_add_annotation_event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""add annotation_event log

Revision ID: a7e1c4f9b3d2
Revises: d4e5f6a7b8c9
Create Date: 2026-06-25 16:00:00.000000

"""

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision = "a7e1c4f9b3d2"
down_revision = "d4e5f6a7b8c9"
branch_labels = None
depends_on = None

_VARIANT_SUBJECT_TYPES = "'vrs_mapping', 'cross_level_translation', 'variant_translation', 'ldh_submission'"
_ALLELE_SUBJECT_TYPES = (
"'clingen_allele_id', 'gnomad_allele_frequency', 'vep_functional_consequence', 'clinvar_control', 'mapped_hgvs'"
)


def upgrade():
op.create_table(
"annotation_event",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("annotation_type", sa.String(length=50), nullable=False),
sa.Column("variant_id", sa.Integer(), nullable=True),
sa.Column("allele_id", sa.Integer(), nullable=True),
sa.Column("disposition", sa.String(length=50), nullable=False),
sa.Column("reason", sa.String(length=50), nullable=False),
sa.Column("source_version", sa.String(length=50), nullable=True),
sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column("job_run_id", sa.Integer(), nullable=True),
sa.Column("score_set_id", sa.Integer(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.CheckConstraint(
f"(annotation_type IN ({_VARIANT_SUBJECT_TYPES}) "
"AND variant_id IS NOT NULL AND allele_id IS NULL) "
f"OR (annotation_type IN ({_ALLELE_SUBJECT_TYPES}) "
"AND allele_id IS NOT NULL AND variant_id IS NULL)",
name="ck_annotation_event_subject",
),
sa.ForeignKeyConstraint(["variant_id"], ["variants.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["allele_id"], ["alleles.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["job_run_id"], ["job_runs.id"], ondelete="SET NULL"),
sa.ForeignKeyConstraint(["score_set_id"], ["scoresets.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_annotation_event_allele_type_id",
"annotation_event",
["allele_id", "annotation_type", sa.text("id DESC")],
unique=False,
)
op.create_index(
"ix_annotation_event_variant_type_id",
"annotation_event",
["variant_id", "annotation_type", sa.text("id DESC")],
unique=False,
)
op.create_index(
"ix_annotation_event_allele_type_version",
"annotation_event",
["allele_id", "annotation_type", "source_version"],
unique=False,
)
op.create_index("ix_annotation_event_job_run_id", "annotation_event", ["job_run_id"], unique=False)


def downgrade():
op.drop_index("ix_annotation_event_job_run_id", table_name="annotation_event")
op.drop_index("ix_annotation_event_allele_type_version", table_name="annotation_event")
op.drop_index("ix_annotation_event_variant_type_id", table_name="annotation_event")
op.drop_index("ix_annotation_event_allele_type_id", table_name="annotation_event")
op.drop_table("annotation_event")
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""rename clinical_controls to clinvar_controls

Revision ID: b2c3d4e5f6a7
Revises: a7c4e9d2f1b8
Create Date: 2026-06-22

Renames the clinical_controls entity table to clinvar_controls, and renames the unique
constraint to match. The frozen association table (mapped_variants_clinical_controls)
and its FK to the renamed table are left structurally intact — PostgreSQL updates the FK
target automatically on table rename. The Python model is renamed ClinicalControl →
ClinvarControl in the same changeset (no data migration).
"""

from alembic import op

revision = "b2c3d4e5f6a7"
down_revision = "a7c4e9d2f1b8"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.rename_table("clinical_controls", "clinvar_controls")
# PostgreSQL does not auto-rename constraints on table rename; rename explicitly so the
# on_conflict_do_update(constraint=...) in the job references the correct name.
op.execute(
"ALTER TABLE clinvar_controls RENAME CONSTRAINT "
"uq_clinical_controls_db_name_identifier_version "
"TO uq_clinvar_controls_db_name_identifier_version"
)


def downgrade() -> None:
op.execute(
"ALTER TABLE clinvar_controls RENAME CONSTRAINT "
"uq_clinvar_controls_db_name_identifier_version "
"TO uq_clinical_controls_db_name_identifier_version"
)
op.rename_table("clinvar_controls", "clinical_controls")
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""add v_current_annotation_events view

Revision ID: b8f2c5a1d3e4
Revises: a7e1c4f9b3d2
Create Date: 2026-06-26

Creates the v_current_annotation_events view: the latest AnnotationEvent per (subject, annotation_type),
with ClinVar partitioned additionally by source_version (multi-live, one current row per release).
Replaces the per-variant v_variant_annotations as the current-state projection over the new
allele-model annotation log. Intended for operator queries, BI, and the annotation CLI scripts.
"""

from alembic import op

from mavedb.db.view import CreateView, DropView
from mavedb.models.annotation_event_view import definition, signature

# revision identifiers, used by Alembic.
revision = "b8f2c5a1d3e4"
down_revision = "a7e1c4f9b3d2"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute(CreateView(signature, definition, materialized=False))


def downgrade() -> None:
op.execute(DropView(signature, materialized=False))
73 changes: 73 additions & 0 deletions alembic/versions/c3d4e5f6a7b8_add_clinvar_allele_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""add clinvar_allele_links table

Revision ID: c3d4e5f6a7b8
Revises: b2c3d4e5f6a7
Create Date: 2026-06-22

New valid-time link table connecting deduplicated alleles to ClinvarControl rows, replacing
the frozen mapped_variants_clinical_controls association for new-model writes.

ClinVar's link shape is deliberately multi-live: the partial unique index is
(allele_id, clinvar_control_id) WHERE valid_to IS NULL, so an allele accumulates one live
link per ClinVar release rather than superseding as in gnomAD/VEP. Each ClinVar release is a
distinct ClinvarControl row, so different releases stack as independent live links. A link is
retired (valid_to closed) only if ClinVar later removes the variant from a release, which
would surface as a re-run finding no data for that release and retiring the corresponding
link — archival data never changes, so this path is theoretical.

The existing mapped_variants_clinical_controls association table is left untouched (frozen
for serving existing data).
"""

import sqlalchemy as sa

from alembic import op

revision = "c3d4e5f6a7b8"
down_revision = "b2c3d4e5f6a7"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.create_table(
"clinvar_allele_links",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("allele_id", sa.Integer(), nullable=False),
sa.Column("clinvar_control_id", sa.Integer(), nullable=False),
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["allele_id"],
["alleles.id"],
name="fk_clinvar_allele_links_allele_id",
ondelete="RESTRICT",
),
sa.ForeignKeyConstraint(
["clinvar_control_id"],
["clinvar_controls.id"],
name="fk_clinvar_allele_links_clinvar_control_id",
ondelete="RESTRICT",
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_clinvar_allele_links_allele_id", "clinvar_allele_links", ["allele_id"])
op.create_index("ix_clinvar_allele_links_clinvar_control_id", "clinvar_allele_links", ["clinvar_control_id"])
# Multi-live: one live link per (allele, release). An allele accumulates one live link per
# ClinVar release rather than superseding — unlike gnomAD/VEP which enforce one live link
# per allele across all versions. Superseded rows (valid_to IS NOT NULL) are preserved for
# point-in-time queries.
op.create_index(
"uq_clinvar_allele_links_live",
"clinvar_allele_links",
["allele_id", "clinvar_control_id"],
unique=True,
postgresql_where=sa.text("valid_to IS NULL"),
)


def downgrade() -> None:
op.drop_index("uq_clinvar_allele_links_live", table_name="clinvar_allele_links")
op.drop_index("ix_clinvar_allele_links_clinvar_control_id", table_name="clinvar_allele_links")
op.drop_index("ix_clinvar_allele_links_allele_id", table_name="clinvar_allele_links")
op.drop_table("clinvar_allele_links")
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""index annotation_event.score_set_id

Revision ID: c9b2a4f8e1d3
Revises: b8f2c5a1d3e4
Create Date: 2026-06-30

Adds the missing foreign-key index on annotation_event.score_set_id. Every other FK on this table
is indexed; score_set_id was not. The index backs the ON DELETE SET NULL cascade fired when a score
set is deleted (an unindexed FK forces a sequential scan of the event log per deleted score set) and
any operator/BI query that filters the log by score set.
"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "c9b2a4f8e1d3"
down_revision = "b8f2c5a1d3e4"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.create_index("ix_annotation_event_score_set_id", "annotation_event", ["score_set_id"], unique=False)


def downgrade() -> None:
op.drop_index("ix_annotation_event_score_set_id", table_name="annotation_event")
29 changes: 29 additions & 0 deletions alembic/versions/d4e5f6a7b8c9_add_clinvar_variation_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""add clinvar_variation_id to clinvar_controls

Revision ID: d4e5f6a7b8c9
Revises: c3d4e5f6a7b8
Create Date: 2026-06-22

Additive, non-breaking column for ClinVar's canonical public identifier (VariationID), captured forward
from the variant_summary TSV. db_identifier continues to hold the AlleleID (the allele-level handle used
for gnomAD cross-references); this carries the VariationID beside it for eventual external ClinVar links
(clinvar/variation/{id}). Nullable and not yet served — the dedicated clinvar_variants remodel (explicit
fields replacing the generic db_* shape, the serving/UI cutover, and backfill of existing rows) is
deferred.
"""

import sqlalchemy as sa
from alembic import op

revision = "d4e5f6a7b8c9"
down_revision = "c3d4e5f6a7b8"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column("clinvar_controls", sa.Column("clinvar_variation_id", sa.String(), nullable=True))


def downgrade() -> None:
op.drop_column("clinvar_controls", "clinvar_variation_id")
Loading
Loading