# handles HTTP requests, retries, and proxy configuration
class HTTPClient:
def __init__(self, base_url: str):
self.base_url = base_url
def get(self, path, params=None):
return requests.get(f"{self.base_url}{path}", params=params)
def post(self, path, params=None, files=None):
return requests.post(f"{self.base_url}{path}", params=params, files=files)
# abstract blueprints defining SDK-exposed API endpoints and methods
class DatasetsAPI(ABC):
@abstractmethod
def get(self, id: int) -> dict: ...
class TasksAPI(ABC):
@abstractmethod
def get(self, id: int) -> dict: ...
@abstractmethod
def list(self, id: int) -> dict: ...
# version-specific implementations built on top of the API blueprints
class DatasetsV1(DatasetsAPI):
def __init__(self, http: HTTPClient):
self._http = http
def get(self, id: int) -> dict:
r = self._http.get(f"/api/v1/json/data/{id}")
d = r.json()["data_set_description"]
return {
"id": d["data_set_description"]["id"],
"name": d["data_set_description"]["name"],
"version": d["data_set_description"]["version"],
}
class DatasetsV2(DatasetsAPI):
def __init__(self, http: HTTPClient):
self._http = http
def get(self, id: int) -> dict:
r = self._http.get(f"/datasets/{id}")
d = r.json()
return {
"id": d["id"],
"name": d["name"],
"version": d["version"],
}
class TasksV1(TasksAPI):
pass
class TasksV2(TasksAPI):
pass
# proxy that falls back to v1 when v2 endpoints are missing or unimplemented
class FallbackProxy:
def __init__(self, primary, fallback, *, strict: bool):
self._primary = primary
self._fallback = fallback
self._strict = strict
def __getattr__(self, name):
primary_attr = getattr(self._primary, name)
if not callable(primary_attr):
return primary_attr
def wrapper(*args, **kwargs):
try:
return primary_attr(*args, **kwargs)
except NotImplementedError:
if self._strict:
raise
return getattr(self._fallback, name)(*args, **kwargs)
return wrapper
# core backend holding API bindings, exposed internally and to the SDK
class APIBackend:
def __init__(self, *, datasets, tasks):
self.datasets = datasets
self.tasks = tasks
def build_backend(version: str, strict: bool) -> APIBackend:
v1_http = HTTPClient("https://www.openml.org")
v2_http = HTTPClient("http://127.0.0.1:8001")
v1 = APIBackend(
datasets=DatasetsV1(v1_http),
tasks=TasksV1(v1_http),
)
if version == "v1":
return v1
v2 = APIBackend(
datasets=DatasetsV2(v2_http),
tasks=TasksV2(v2_http),
)
if strict:
return v2
return APIBackend(
datasets=FallbackProxy(v2.datasets, v1.datasets),
tasks=FallbackProxy(v2.tasks, v1.tasks),
)
class APIContext:
def __init__(self):
self._backend = build_backend("v1", strict=False)
def set_version(self, version: str, strict: bool = False):
self._backend = build_backend(version, strict)
@property
def backend(self):
return self._backend
# SDK-facing entry points and version switching helpers
api_context = APIContext()
def set_api_version(version: str, strict=False):
api_context.set_version(version=version, strict=strict)
openml.set_api_version("v2", strict=True)
api_context.backend.data.get(31)
api_context.backend.tasks.list()
This issue tracks the migration of the
openml-pythonSDK from the legacy OpenML API v1 (PHP-based) in openml/OpenML to the newer API v2 (Python-based) in openml/server-api, along with a structural refactor of the codebase to support both APIs during the transition.The SDK currently relies on the v1 API, which should be progressively replaced by v2 while keeping backward compatibility and fallback where v2 endpoints are incomplete.
Goals
Proposed design (high level)
HTTPClientresponsible for:DatasetsAPI,TasksAPI) defining SDK-facing methods*V1,*V2) mapping those methods to concrete endpointsstrict=True)A draft implementation sketch is included below to illustrate the architecture (not final API).
Work Items / Refactor scope
Base Scaffolding
Foundation: Establish the initial folder and file structure along with minimal base implementations for core components (HTTP client, backend, resource interfaces, versioned stubs). This provides a stable scaffold that subsequent refactor and migration work can build on.
HTTPClient features: Extend the shared HTTP client to provide consistent, reusable infrastructure concerns across all API versions and resources.
Fallback policy: Define and implement the fallback behavior that transparently redirects calls from v2 to v1 when endpoints are missing or unimplemented, with strict mode disabling fallback and surfacing errors.
Resource implementations
Implement resource classes that define SDK-facing methods and map them to concrete API endpoints. Each resource should have a clear abstract interface and version-specific implementations (
*V1,*V2).TaskTypes(not used in sdk)Users(not used in sdk - though questionable, since spotted in evaluations)