Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ The reference implementation is meant to be customized for your facility's IRI i
### Customizing the business logic for your facility
The IRI API handles the "boilerplate" of setting up the rest API. It delegates to the per-facility business logic via interface definitions. These interfaces are implemented as abstract classes, one per api group (status, account, etc.). Each router directory defines a FacilityAdapter class (eg. [the status adapter](app/routers/status/facility_adapter.py)) that is expected to be implemented by the facility who is exposing an IRI API instance.

## Forwarded Project Header For Compute Requests

Compute submission and update requests support a trusted forwarded header named `X-IRI-Facility-Project`.

This header is intended for deployments where an upstream trusted component has already resolved the caller's project/account into the facility-native value required by the downstream scheduler or execution system.

When `X-IRI-Facility-Project` is present and valid:

- IRI treats that header value as the effective project/account for the compute request.
- The downstream compute adapter receives the request as if that value were the facility-native account to use for job submission or update.
- Implementations may surface that effective value in returned job metadata, scheduler requests, labels, annotations, or similar downstream submission context.

For compute submit/update requests, the effective project/account must be specified in exactly one place:

- `job_spec.attributes.account`, or
- `X-IRI-Facility-Project`

If both are provided, IRI returns `400 Bad Request`.
If neither is provided, IRI returns `400 Bad Request`.
This behavior is specific to compute submission/update handling; read-only endpoints are unchanged.

The specific implementations can be specified via the `IRI_API_ADAPTER_*` environment variables. For example the adapter for the `status` api would be given by setting `IRI_API_ADAPTER_status` to the full python module and class implementing `app.routers.status.facility_adapter.FacilityAdapter`. (eg. `IRI_API_ADAPTER_status=myfacility.MyFacilityStatusAdapter`)

As a default implementation, this project supplies the [demo adapter](app/demo_adapter.py) which implements every facility adapter with fake data.
Expand Down
9 changes: 7 additions & 2 deletions app/demo_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .routers.status import models as status_models
from .routers.task import facility_adapter as task_adapter
from .routers.task import models as task_models
from .request_context import get_iri_facility_project
from .types.models import Capability
from .types.user import User
from .types.scalars import AllocationUnit
Expand Down Expand Up @@ -542,14 +543,16 @@ async def submit_job(
user: User,
job_spec: compute_models.JobSpec,
) -> compute_models.Job:
facility_project = get_iri_facility_project()
account = facility_project or (job_spec.attributes.account if job_spec.attributes else None)
return compute_models.Job(
id="job_123",
status=compute_models.JobStatus(
state=compute_models.JobState.NEW,
time=utc_timestamp(),
message="job submitted",
exit_code=0,
meta_data={"account": "account1"},
meta_data={"account": account},
),
)

Expand All @@ -560,14 +563,16 @@ async def update_job(
job_spec: compute_models.JobSpec,
job_id: str,
) -> compute_models.Job:
facility_project = get_iri_facility_project()
account = facility_project or (job_spec.attributes.account if job_spec.attributes else None)
return compute_models.Job(
id=job_id,
status=compute_models.JobStatus(
state=compute_models.JobState.ACTIVE,
time=utc_timestamp(),
message="job updated",
exit_code=0,
meta_data={"account": "account1"},
meta_data={"account": account},
),
)

Expand Down
8 changes: 5 additions & 3 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from . import config
from .apilogger import configure_logging
from .request_context import set_api_url_base, _api_url_base
from .request_context import _api_url_base, _iri_facility_project, set_api_url_base

from app.routers.error_handlers import install_error_handlers
from app.routers.facility import facility
Expand Down Expand Up @@ -58,12 +58,14 @@

class _ExternalRequestContextMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
token = _api_url_base.set(None)
url_token = _api_url_base.set(None)
facility_project_token = _iri_facility_project.set(None)
try:
set_api_url_base(request)
return await call_next(request)
finally:
_api_url_base.reset(token)
_api_url_base.reset(url_token)
_iri_facility_project.reset(facility_project_token)


APP.add_middleware(_ExternalRequestContextMiddleware)
Expand Down
8 changes: 8 additions & 0 deletions app/request_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import config

_api_url_base: ContextVar[str | None] = ContextVar("_api_url_base", default=None)
_iri_facility_project: ContextVar[str | None] = ContextVar("_iri_facility_project", default=None)


def _first_header_value(value: str | None) -> str:
Expand All @@ -22,6 +23,8 @@ def set_api_url_base(request: Request) -> None:
api_url = config.API_URL.strip("/")
if host:
_api_url_base.set(f"{proto}://{host}{prefix}{api_prefix}/{api_url}")
facility_project = _first_header_value(request.headers.get("x-iri-facility-project"))
_iri_facility_project.set(facility_project or None)


def get_url_prefix() -> str:
Expand All @@ -30,3 +33,8 @@ def get_url_prefix() -> str:
if value:
return value
return f"{config.API_URL_ROOT}{config.API_PREFIX}{config.API_URL}"


def get_iri_facility_project() -> str | None:
"""Return the facility-native project/account identifier forwarded by RIG."""
return _iri_facility_project.get()
18 changes: 15 additions & 3 deletions app/routers/compute/compute.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Compute resource API router"""

from fastapi import Depends, HTTPException, Query, Request, status
from fastapi import Depends, Query, Request, status

from ...types.http import forbidExtraQueryParams
from ...types.scalars import StrictHTTPBool
Expand All @@ -16,8 +16,6 @@
prefix="/compute",
tags=["compute"],
)


@router.post(
"/job/{resource_id:str}",
response_model=models.Job,
Expand All @@ -31,13 +29,20 @@ async def submit_job(
job_spec: models.JobSpec,
request: Request,
user: User = Depends(router.current_user),
project_name: str | None = Depends(router.iri_header_project),
_forbid=Depends(forbidExtraQueryParams()),
):
"""
Submit a job on a compute resource

- **resource**: the name of the compute resource to use
- **job_request**: a PSIJ job spec as defined <a href="https://exaworks.org/psij-python/docs/v/0.9.11/.generated/tree.html#jobspec">here</a>
- **project/account resolution**:
The effective project/account for the submission must be supplied in exactly one place:
`job_spec.attributes.account` or the trusted `X-IRI-Facility-Project` request header.
If the forwarded header is present and valid, IRI treats its value as the effective facility-native project/account
for the downstream submission and related job metadata. If both sources are present, or neither is present,
the request is rejected with `400 Bad Request`.

This command will attempt to submit a job and return its id.
"""
Expand All @@ -63,6 +68,7 @@ async def update_job(
job_spec: models.JobSpec,
request: Request,
user: User = Depends(router.current_user),
project_name: str | None = Depends(router.iri_header_project),
_forbid=Depends(forbidExtraQueryParams()),
):
"""
Expand All @@ -71,6 +77,12 @@ async def update_job(

- **resource**: the name of the compute resource to use
- **job_request**: a PSIJ job spec as defined <a href="https://exaworks.org/psij-python/docs/v/0.9.11/.generated/tree.html#jobspec">here</a>
- **project/account resolution**:
The effective project/account for the update must be supplied in exactly one place:
`job_spec.attributes.account` or the trusted `X-IRI-Facility-Project` request header.
If the forwarded header is present and valid, IRI treats its value as the effective facility-native project/account
for downstream update handling and job metadata. If both sources are present, or neither is present,
the request is rejected with `400 Bad Request`.

"""
# look up the resource (todo: maybe ensure it's available)
Expand Down
20 changes: 18 additions & 2 deletions app/routers/compute/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,16 @@ class JobAttributes(IRIBaseModel):

duration: int|None = Field(default=None, description="Duration in seconds", ge=1, examples=[30, 60, 120])
queue_name: str|None = Field(default=None, min_length=1, description="Name of the queue or partition to submit the job to", example="debug")
account: str|None = Field(default=None, min_length=1, description="Account or project to charge for resource usage", example="proj123")
account: str|None = Field(
default=None,
min_length=1,
description=(
"Account or project to charge for resource usage. "
"For compute submission/update requests, specify this here only when the caller is not relying on a trusted forwarded "
"`X-IRI-Facility-Project` header. If that header is present and valid, this field must be omitted."
),
example="proj123",
)
reservation_id: str|None = Field(default=None, min_length=1, description="ID of a reservation to use for the job", example="resv-42")
custom_attributes: dict[str, str] = Field(default_factory=dict, description="Custom scheduler-specific attributes as key-value pairs", example={"constraint": "gpu"})

Expand Down Expand Up @@ -79,7 +88,14 @@ class JobSpec(IRIBaseModel):
stdout_path: str|None = Field(default=None, min_length=1, description="Path to file to write standard output", example="/home/user/output.txt")
stderr_path: str|None = Field(default=None, min_length=1, description="Path to file to write standard error", example="/home/user/error.txt")
resources: ResourceSpec|None = Field(default=None, description="Resource requirements for the job")
attributes: JobAttributes|None = Field(default=None, description="Additional job attributes such as duration, queue, and account")
attributes: JobAttributes|None = Field(
default=None,
description=(
"Additional job attributes such as duration, queue, and account. "
"For compute submission/update, the effective project/account must be supplied in exactly one place: "
"`attributes.account` or the trusted `X-IRI-Facility-Project` request header."
),
)
pre_launch: str|None = Field(default=None, min_length=1, description="Script or commands to run before launching the job", example="module load cuda")
post_launch: str|None = Field(default=None, min_length=1, description="Script or commands to run after the job completes", example="echo done")
launcher: str|None = Field(default=None, min_length=1, description="Job launcher to use (e.g., 'mpirun', 'srun')", example="srun")
Expand Down
27 changes: 26 additions & 1 deletion app/routers/iri_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import logging
import importlib
import time
from typing import Any
import globus_sdk
from fastapi import Request, Depends, HTTPException, APIRouter
from fastapi import Body, Request, Depends, HTTPException, APIRouter
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials

from ..request_context import get_iri_facility_project
from ..types.user import User

bearer_scheme = HTTPBearer()
Expand Down Expand Up @@ -159,6 +161,29 @@ async def current_user(
raise HTTPException(status_code=404, detail="User not found")
return user

async def iri_header_project(self, request: Request, job_spec: dict[str, Any] | None = Body(default=None)) -> str | None:
"""Expose and validate the forwarded facility-project header for compute routes."""
project_name = get_iri_facility_project()
spec_account = None
if job_spec is not None:
attributes = job_spec.get("attributes")
if isinstance(attributes, dict):
spec_account = attributes.get("account")
elif attributes is not None:
# Leave malformed body handling to FastAPI/Pydantic validation.
return project_name
if spec_account and project_name:
raise HTTPException(
status_code=400,
detail="Specify project/account in exactly one place: job_spec.attributes.account or X-IRI-Facility-Project, not both.",
)
if not spec_account and not project_name:
raise HTTPException(
status_code=400,
detail="Project/account must be specified in exactly one place: job_spec.attributes.account or X-IRI-Facility-Project.",
)
return project_name


class AuthenticatedAdapter(ABC):
@abstractmethod
Expand Down
Loading
Loading