diff --git a/README.md b/README.md
index 9e34777..fea133b 100644
--- a/README.md
+++ b/README.md
@@ -225,25 +225,30 @@ msg = client.messages.create(
Long-running processes: auto-refreshing the Entra ID token
-The plain `anthropic.Anthropic` client only accepts `auth_token: str | None`, so a captured token will start failing with `401 Unauthorized` after ~1 hour.
+A captured Entra ID token starts failing with `401 Unauthorized` after ~1 hour, so services, daemons, long batch jobs, and notebooks left open need a refresh path.
-For services, daemons, long batch jobs, or notebooks left open, use [src/hello_claude_token_refresh.py](./src/hello_claude_token_refresh.py). It defines a tiny `AnthropicIdentity(Anthropic)` subclass that overrides the `auth_token` property to call `azure.identity.get_bearer_token_provider(...)` per request, giving free per-request token refresh:
+Anthropic SDK v0.98+ added a `credentials=` constructor parameter that takes an `AccessTokenProvider` callable. The SDK wraps it in a `TokenCache` that calls the provider lazily, caches the token until expiry, and on a 401 invalidates the cache and retries the request once with a fresh token — exactly what we want.
+
+[src/hello_claude_token_refresh.py](./src/hello_claude_token_refresh.py) wires `azure.identity.DefaultAzureCredential` into that hook:
```python
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-# AnthropicIdentity is defined in hello_claude_token_refresh.py
-from hello_claude_token_refresh import AnthropicIdentity
+from anthropic import Anthropic
+from anthropic.lib.credentials import AccessToken
+from azure.identity import DefaultAzureCredential
-token_provider = get_bearer_token_provider(
- DefaultAzureCredential(), "https://ai.azure.com/.default"
-)
-client = AnthropicIdentity(
- azure_ad_token_provider=token_provider,
+credential = DefaultAzureCredential()
+
+def entra_token_provider(*, force_refresh: bool = False) -> AccessToken:
+ token = credential.get_token("https://ai.azure.com/.default")
+ return AccessToken(token=token.token, expires_at=token.expires_on)
+
+client = Anthropic(
+ credentials=entra_token_provider,
base_url="https://.services.ai.azure.com/anthropic",
)
```
-If the Anthropic SDK ever accepts a callable for `auth_token`, this shim becomes unnecessary.
+Requires `anthropic>=0.109.1` (pinned in [requirements.txt](./requirements.txt)).
@@ -733,7 +738,7 @@ The Terraform variant uses `azapi_resource` for both the Foundry account and the
| `Project can only be created under AIServices Kind account with allowProjectManagement set to true` | Account property missing. Both variants here set it; check you didn't downgrade the API version. |
| `404 Not Found` on inference | Base URL must end in `/anthropic` — `https://.services.ai.azure.com/anthropic`. |
| `401 Unauthorized` | Token scope must be `https://ai.azure.com/.default`. Re-run `az login`. |
-| `401 Unauthorized` after ~1 hour of running | The Entra ID token captured at startup has expired. The plain `Anthropic` client doesn't auto-refresh — see the [long-running token refresh shim](#advanced-long-running-processes-auto-refreshing-the-entra-id-token) for [src/hello_claude_token_refresh.py](./src/hello_claude_token_refresh.py), which uses an `AnthropicIdentity` shim to refresh per request. |
+| `401 Unauthorized` after ~1 hour of running | The Entra ID token captured at startup has expired. Pass `credentials=` to `Anthropic(...)` instead of `auth_token=` — see [long-running processes](#advanced-long-running-processes-auto-refreshing-the-entra-id-token) and [src/hello_claude_token_refresh.py](./src/hello_claude_token_refresh.py). The SDK's `TokenCache` refreshes the token on 401 and retries once. |
| `403 Forbidden` | Missing a data-plane role on the Foundry account. Grant `Cognitive Services User`, `Foundry User` (formerly `Azure AI User`), or `Azure AI Developer` (see [Required permissions](#required-permissions)). |
| `Region not available` | Deploy to `eastus2` or `swedencentral` (or `westus2` for opus-only). |
| Subscription can't deploy Claude | Confirm subscription eligibility per the [official docs](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/use-foundry-models-claude#prerequisites). The [preprovision preflight](#preprovision-preflight-marketplace-catalog--quota) warns about this before `azd up` calls the RP. |
diff --git a/requirements.txt b/requirements.txt
index 21089ff..76e482e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-anthropic>=0.104.1
+anthropic>=0.109.1
azure-identity>=1.19.0
python-dotenv>=1.2.2
diff --git a/skills/claude-on-foundry/SKILL.md b/skills/claude-on-foundry/SKILL.md
index ea00caa..dbecefd 100644
--- a/skills/claude-on-foundry/SKILL.md
+++ b/skills/claude-on-foundry/SKILL.md
@@ -128,7 +128,7 @@ Match the customer's exact error string to a row. Verify the diagnostic command
|---|---|---|---|
| `404 Not Found` on first SDK call | `base_url` is missing the `/anthropic` suffix. | Print the `base_url` the script is using. | Append `/anthropic` so it's `https://.services.ai.azure.com/anthropic`. |
| `401 Unauthorized` on first call | Token scope wrong, or no `az login`. | `az account get-access-token --resource https://ai.azure.com/.default --query expiresOn` | `az login` (add `--tenant ` if Foundry is in a different tenant). Scope must be `https://ai.azure.com/.default`. |
-| `401 Unauthorized` after ~1 hour of running | Captured token expired; plain `Anthropic` client doesn't auto-refresh. | Check how long the process has been alive. | Switch to [`src/hello_claude_token_refresh.py`](../../src/hello_claude_token_refresh.py) which uses `AnthropicIdentity` + `get_bearer_token_provider` for per-request refresh. |
+| `401 Unauthorized` after ~1 hour of running | Captured static `auth_token` expired; the SDK does not refresh it. | Check how long the process has been alive. | Switch to [`src/hello_claude_token_refresh.py`](../../src/hello_claude_token_refresh.py) which uses `Anthropic(credentials=...)` — the SDK's `TokenCache` refreshes on 401 and retries once. Requires `anthropic>=0.109.1`. |
| `401 PermissionDenied: Principal does not have access to API/Operation` — intermittently, passes seconds later | Data-plane RBAC propagation lag right after a role grant. | `az role assignment list --assignee --scope -o table` | Wait 1-3 minutes and retry. Do NOT suggest disabling retries. |
| `403 Forbidden` consistently | Caller has no data-plane role on the Foundry account. | Same `az role assignment list` query. | Grant `Cognitive Services User` (minimum), `Foundry User`, or `Azure AI Developer`. See the one-liner in [README → Granting data-plane roles after `azd up`](../../README.md#granting-data-plane-roles-after-azd-up). |
| `claude -p` says: `The model claude--... is not available on your foundry deployment` | User-global `~/.claude/settings.json` pins a family this workspace didn't deploy, overriding the workspace pin. | `cat .claude/settings.json` and `cat ~/.claude/settings.json`. | Re-run `pwsh -File scripts/configure-claude-code.ps1`, OR pass `--model ` explicitly, OR (with user OK) edit the user-global file to remove the `"model"` line. |
@@ -172,7 +172,7 @@ claude # interactive REPL; try /status and /mode
| **Switch variants (Bicep ↔ Terraform)** | They produce equivalent infra but with different `azd` env state. Create a new env in the other folder: `cd infra-terraform && azd env new && ...`. |
| **Refresh Claude Code wiring** | `pwsh -File scripts/configure-claude-code.ps1` (or the `.sh` variant). Idempotent — runs without re-deploying. |
| **Wire up the Claude Code VS Code extension** | `azd env set CLAUDE_WRITE_VSCODE_SETTINGS 1` (then re-run `azd provision` or the configure script). Opt-in because the activator + `.claude/settings.json` are enough for the CLI and SDK; only the [Anthropic Claude Code VS Code extension](https://marketplace.visualstudio.com/items?itemName=anthropic.claude-code) needs `claudeCode.*` keys in workspace settings. |
-| **Convert to long-running auth** | Replace `Anthropic(auth_token=...)` with `AnthropicIdentity(azure_ad_token_provider=...)` from [`src/hello_claude_token_refresh.py`](../../src/hello_claude_token_refresh.py). |
+| **Convert to long-running auth** | Replace `Anthropic(auth_token=token)` with `Anthropic(credentials=provider)` where `provider` is a zero-arg callable returning `anthropic.lib.credentials.AccessToken(token, expires_at)`. See [`src/hello_claude_token_refresh.py`](../../src/hello_claude_token_refresh.py). Requires `anthropic>=0.109.1`. |
---
diff --git a/src/hello_claude_token_refresh.py b/src/hello_claude_token_refresh.py
index 548e5c2..4d1e8de 100644
--- a/src/hello_claude_token_refresh.py
+++ b/src/hello_claude_token_refresh.py
@@ -7,51 +7,49 @@
## Why this exists
-The plain `anthropic.Anthropic` client only accepts `auth_token: str | None`,
-so a captured Entra ID token will start failing with `401 Unauthorized` after
+The plain `anthropic.Anthropic` client's `auth_token` is a static `str`, so a
+captured Entra ID token would start failing with `401 Unauthorized` after
roughly an hour.
-The Anthropic SDK reads `self.auth_token` via a property on every request, so
-we subclass `Anthropic` and turn it into a property that calls the Entra
-token provider, giving free per-request token refresh.
+Anthropic SDK v0.98+ added a public `credentials=` constructor argument that
+takes an `AccessTokenProvider` callable. The SDK wraps it in a `TokenCache`
+that calls the provider lazily, caches the result until expiry, and on a 401
+invalidates the cache and retries the request once with a fresh token. That
+matches exactly what we need to bridge `azure.identity` into the Anthropic
+client without subclassing or shimming `auth_token`.
"""
from __future__ import annotations
import os
import sys
-from typing import Callable
from anthropic import Anthropic
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+from anthropic.lib.credentials import AccessToken
+from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
-class AnthropicIdentity(Anthropic):
- """Plain Anthropic client that pulls a fresh Entra ID token per request."""
+def _entra_credentials_provider(scope: str = "https://ai.azure.com/.default"):
+ """Build an Anthropic `AccessTokenProvider` backed by `DefaultAzureCredential`.
- def __init__(
- self,
- *,
- azure_ad_token_provider: Callable[[], str],
- base_url: str,
- **kwargs,
- ) -> None:
- self._azure_ad_token_provider = azure_ad_token_provider
- # `auth_token` must be non-None so the parent's auth-header builder
- # emits the `Authorization` header. The actual token comes from our
- # property override below.
- super().__init__(auth_token="placeholder", base_url=base_url, **kwargs)
+ The provider is called by the SDK's `TokenCache` only when there is no
+ cached token, when the cached token has expired, or when a 401 forced an
+ invalidation. `azure.identity` itself also caches and refreshes tokens
+ internally, so this stays cheap on the hot path.
+ """
+ credential = DefaultAzureCredential()
- @property
- def auth_token(self) -> str: # type: ignore[override]
- return self._azure_ad_token_provider()
+ def _provider(*, force_refresh: bool = False) -> AccessToken:
+ # `force_refresh` is set by TokenCache.invalidate() after a 401.
+ # DefaultAzureCredential does not expose a force-refresh knob, but
+ # re-calling get_token() is enough: it will mint a new token if the
+ # cached one is close to expiry, which is the common 401 cause.
+ token = credential.get_token(scope)
+ # `expires_on` is unix seconds — the format Anthropic's TokenCache expects.
+ return AccessToken(token=token.token, expires_at=token.expires_on)
- @auth_token.setter
- def auth_token(self, _value: str | None) -> None:
- # Silently ignore the parent's `self.auth_token = ...` assignment;
- # the provider is the source of truth.
- pass
+ return _provider
def main() -> int:
@@ -64,12 +62,8 @@ def main() -> int:
print("Set CLAUDE_BASE_URL and CLAUDE_DEPLOYMENT_NAME.", file=sys.stderr)
return 1
- token_provider = get_bearer_token_provider(
- DefaultAzureCredential(), "https://ai.azure.com/.default"
- )
-
- client = AnthropicIdentity(
- azure_ad_token_provider=token_provider,
+ client = Anthropic(
+ credentials=_entra_credentials_provider(),
base_url=base_url,
)