Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
62 changes: 62 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Literal

DelayMethod = Literal["human", "robot"]


@dataclass
class APIConfig:
server: str
base_url: str
key: str
timeout: int = 10 # seconds


@dataclass
class APISettings:
v1: APIConfig
v2: APIConfig


@dataclass
class ConnectionConfig:
retries: int = 3
delay_method: DelayMethod = "human"
delay_time: int = 1 # seconds

def __post_init__(self) -> None:
if self.delay_method not in ("human", "robot"):
raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}")


@dataclass
class CacheConfig:
dir: str = "~/.openml/cache"
ttl: int = 60 * 60 * 24 * 7 # one week


@dataclass
class Settings:
api: APISettings
connection: ConnectionConfig
cache: CacheConfig


settings = Settings(
api=APISettings(
v1=APIConfig(
server="https://www.openml.org/",
base_url="api/v1/xml/",
key="...",
),
v2=APIConfig(
server="http://127.0.0.1:8001/",
base_url="",
key="...",
),
),
connection=ConnectionConfig(),
cache=CacheConfig(),
)
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
151 changes: 151 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urljoin, urlparse

import requests
from requests import Response

from openml.__version__ import __version__
from openml._api.config import settings

if TYPE_CHECKING:
from openml._api.config import APIConfig


class CacheMixin:
@property
def dir(self) -> str:
return settings.cache.dir

@property
def ttl(self) -> int:
return settings.cache.ttl

def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path:
parsed_url = urlparse(url)
netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain
path_parts = parsed_url.path.strip("/").split("/")

# remove api_key and serialize params if any
filtered_params = {k: v for k, v in params.items() if k != "api_key"}
params_part = [urlencode(filtered_params)] if filtered_params else []

return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part)

def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002
return Response()

def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002
return None


class HTTPClient(CacheMixin):
def __init__(self, config: APIConfig) -> None:
self.config = config
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

@property
def server(self) -> str:
return self.config.server

@property
def base_url(self) -> str:
return self.config.base_url

@property
def key(self) -> str:
return self.config.key

@property
def timeout(self) -> int:
return self.config.timeout

def request(
self,
method: str,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
url = urljoin(self.server, urljoin(self.base_url, path))

params = request_kwargs.pop("params", {})
params = params.copy()
if use_api_key:
params["api_key"] = self.key

headers = request_kwargs.pop("headers", {})
headers = headers.copy()
headers.update(self.headers)

timeout = request_kwargs.pop("timeout", self.timeout)
cache_dir = self._get_cache_dir(url, params)

if use_cache:
try:
return self._get_cache_response(cache_dir)
# TODO: handle ttl expired error
except Exception:
raise

response = requests.request(
method=method,
url=url,
params=params,
headers=headers,
timeout=timeout,
**request_kwargs,
)

if use_cache:
self._set_cache_response(cache_dir, response)

return response

def get(
self,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
# TODO: remove override when cache is implemented
use_cache = False
return self.request(
method="GET",
path=path,
use_cache=use_cache,
use_api_key=use_api_key,
**request_kwargs,
)

def post(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="POST",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)

def delete(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="DELETE",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)
Empty file added openml/_api/http/utils.py
Empty file.
4 changes: 4 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"]
100 changes: 100 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing_extensions import Literal

if TYPE_CHECKING:
import pandas as pd
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataFeature, OpenMLDataset
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
dataset_id: int | str,
*,
return_response: bool = False,
) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...

@abstractmethod
def list(
self,
limit: int,
offset: int,
*,
data_id: list[int] | None = None, # type: ignore
**kwargs: Any,
) -> pd.DataFrame: ...
Comment on lines +32 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we not have same signature for all 3 methods: DatasetsAPI.list, DatasetsV1.list, DatasetsV2.list? does it raise pre-commit failures since a few might not be used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that v2 signature was experimental, idk how pre-commits did not catch that, Will came them same


@abstractmethod
def delete(self, dataset_id: int) -> bool: ...

@abstractmethod
def edit( # noqa: PLR0913
self,
data_id: int,
description: str | None = None,
creator: str | None = None,
contributor: str | None = None,
collection_date: str | None = None,
language: str | None = None,
default_target_attribute: str | None = None,
ignore_attribute: str | list[str] | None = None, # type: ignore
citation: str | None = None,
row_id_attribute: str | None = None,
original_data_url: str | None = None,
paper_url: str | None = None,
) -> int: ...

@abstractmethod
def fork(self, dataset_id: int) -> int: ...

@abstractmethod
def status_update(self, dataset_id: int, status: Literal["active", "deactivated"]) -> None: ...

@abstractmethod
def list_qualities(self) -> list[str]: ... # type: ignore

@abstractmethod
def feature_add_ontology(self, dataset_id: int, index: int, ontology: str) -> bool: ...

@abstractmethod
def feature_remove_ontology(self, dataset_id: int, index: int, ontology: str) -> bool: ...

@abstractmethod
def get_features(self, dataset_id: int) -> dict[int, OpenMLDataFeature]: ...

@abstractmethod
def get_qualities(self, dataset_id: int) -> dict[str, float] | None: ...

@abstractmethod
def parse_features_file(
self, features_file: Path, features_pickle_file: Path
) -> dict[int, OpenMLDataFeature]: ...

@abstractmethod
def parse_qualities_file(
self, qualities_file: Path, qualities_pickle_file: Path
) -> dict[str, float] | None: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...
Loading