Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 163 additions & 3 deletions langfuse/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,48 @@ class OpenAiDefinition:
type="chat",
sync=True,
),
OpenAiDefinition(
module="openai.resources.images",
object="Images",
method="generate",
type="image",
sync=True,
),
OpenAiDefinition(
module="openai.resources.images",
object="AsyncImages",
method="generate",
type="image",
sync=False,
),
OpenAiDefinition(
module="openai.resources.images",
object="Images",
method="edit",
type="image",
sync=True,
),
OpenAiDefinition(
module="openai.resources.images",
object="AsyncImages",
method="edit",
type="image",
sync=False,
),
OpenAiDefinition(
module="openai.resources.images",
object="Images",
method="create_variation",
type="image",
sync=True,
),
OpenAiDefinition(
module="openai.resources.images",
object="AsyncImages",
method="create_variation",
type="image",
sync=False,
),
OpenAiDefinition(
module="openai.resources.completions",
object="Completions",
Expand Down Expand Up @@ -354,9 +396,12 @@ def _extract_chat_response(kwargs: Any) -> Any:


def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> Any:
default_name = (
"OpenAI-embedding" if resource.type == "embedding" else "OpenAI-generation"
)
if resource.type == "embedding":
default_name = "OpenAI-embedding"
elif resource.type == "image":
default_name = "OpenAI-image"
else:
default_name = "OpenAI-generation"
name = kwargs.get("name", default_name)

if name is None:
Expand Down Expand Up @@ -417,6 +462,12 @@ def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> A
prompt = _extract_chat_prompt(kwargs)
elif resource.type == "embedding":
prompt = kwargs.get("input", None)
elif resource.type == "image":
# generate() and edit() accept prompt, but create_variation() does not
if resource.method in ["generate", "edit"]:
prompt = kwargs.get("prompt", None)
else:
prompt = None # create_variation uses image input, not text prompt

parsed_temperature = (
kwargs.get("temperature", 1)
Expand Down Expand Up @@ -479,6 +530,44 @@ def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> A
modelParameters["dimensions"] = parsed_dimensions
if parsed_encoding_format != "float":
modelParameters["encoding_format"] = parsed_encoding_format
elif resource.type == "image":
# Image generation parameters
modelParameters = {}

parsed_size = (
kwargs.get("size", None)
if not isinstance(kwargs.get("size", None), NotGiven)
else None
)
if parsed_size is not None:
modelParameters["size"] = parsed_size

parsed_quality = (
kwargs.get("quality", None)
if not isinstance(kwargs.get("quality", None), NotGiven)
else None
)
if parsed_quality is not None:
modelParameters["quality"] = parsed_quality

parsed_style = (
kwargs.get("style", None)
if not isinstance(kwargs.get("style", None), NotGiven)
else None
)
if parsed_style is not None:
modelParameters["style"] = parsed_style

parsed_response_format = (
kwargs.get("response_format", None)
if not isinstance(kwargs.get("response_format", None), NotGiven)
else None
)
if parsed_response_format is not None:
modelParameters["response_format"] = parsed_response_format

if parsed_n is not None and isinstance(parsed_n, int) and parsed_n > 1:
modelParameters["n"] = parsed_n
else:
modelParameters = {
"temperature": parsed_temperature,
Expand Down Expand Up @@ -791,6 +880,33 @@ def _get_langfuse_data_from_default_response(
"count": len(data),
}

elif resource.type == "image":
data = response.get("data", [])
completion = []
for item in data:
image_data = item.__dict__ if hasattr(item, "__dict__") else item
image_result = {}

# Handle URL response
if image_data.get("url"):
image_result["url"] = image_data["url"]

# Handle base64 response
if image_data.get("b64_json"):
# Wrap in LangfuseMedia for proper handling
base64_data_uri = f"data:image/png;base64,{image_data['b64_json']}"
image_result["image"] = LangfuseMedia(base64_data_uri=base64_data_uri)

# Include revised_prompt if present (DALL-E 3)
if image_data.get("revised_prompt"):
image_result["revised_prompt"] = image_data["revised_prompt"]

completion.append(image_result)

# If only one image, unwrap from list
if len(completion) == 1:
completion = completion[0]

usage = _parse_usage(response.get("usage", None))

return (model, completion, usage)
Expand Down Expand Up @@ -842,6 +958,28 @@ def _wrap(
try:
openai_response = wrapped(**arg_extractor.get_openai_args())

# Handle image generation (non-streaming)
if open_ai_resource.type == "image":
model, completion, usage = _get_langfuse_data_from_default_response(
open_ai_resource,
(openai_response and openai_response.__dict__)
if _is_openai_v1()
else openai_response,
)

# Calculate image count for usage tracking
image_count = 1
if isinstance(completion, list):
image_count = len(completion)

generation.update(
model=model,
output=completion,
usage_details={"output": image_count, "total": image_count, "unit": "IMAGES"},
).end()

return openai_response

if _is_streaming_response(openai_response):
return LangfuseResponseGeneratorSync(
resource=open_ai_resource,
Expand Down Expand Up @@ -913,6 +1051,28 @@ async def _wrap_async(
try:
openai_response = await wrapped(**arg_extractor.get_openai_args())

# Handle image generation (non-streaming)
if open_ai_resource.type == "image":
model, completion, usage = _get_langfuse_data_from_default_response(
open_ai_resource,
(openai_response and openai_response.__dict__)
if _is_openai_v1()
else openai_response,
)

# Calculate image count for usage tracking
image_count = 1
if isinstance(completion, list):
image_count = len(completion)

generation.update(
model=model,
output=completion,
usage_details={"output": image_count, "total": image_count, "unit": "IMAGES"},
).end()

return openai_response

if _is_streaming_response(openai_response):
return LangfuseResponseGeneratorAsync(
resource=open_ai_resource,
Expand Down
100 changes: 100 additions & 0 deletions tests/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -1594,3 +1594,103 @@ async def test_async_openai_embeddings(openai):
assert embedding_data.metadata["async"] is True
assert embedding_data.usage.input is not None
assert embedding_data.usage.total is not None


def test_openai_image_generation(openai):
"""Test that image generation calls are tracked in Langfuse."""
generation_name = "test_image_generation_" + create_uuid()[:8]

response = openai.OpenAI().images.generate(
name=generation_name,
model="dall-e-3",
prompt="A white siamese cat",
size="1024x1024",
quality="standard",
n=1,
metadata={"test_key": "test_value"},
)

langfuse.flush()
sleep(1)

generation = get_api().observations.get_many(name=generation_name, type="GENERATION")

assert len(generation.data) != 0
generation_data = generation.data[0]
assert generation_data.name == generation_name
assert generation_data.metadata["test_key"] == "test_value"
assert generation_data.input == "A white siamese cat"
assert generation_data.type == "GENERATION"
assert "dall-e-3" in generation_data.model
assert generation_data.start_time is not None
assert generation_data.end_time is not None
assert generation_data.start_time < generation_data.end_time
assert generation_data.output is not None
# Check model parameters
assert generation_data.model_parameters is not None
assert generation_data.model_parameters.get("size") == "1024x1024"
assert generation_data.model_parameters.get("quality") == "standard"


@pytest.mark.asyncio
async def test_openai_image_generation_async(openai):
"""Test that async image generation calls are tracked in Langfuse."""
generation_name = "test_image_generation_async_" + create_uuid()[:8]

response = await openai.AsyncOpenAI().images.generate(
name=generation_name,
model="dall-e-3",
prompt="A sunset over mountains",
size="1024x1024",
quality="standard",
n=1,
metadata={"async": True},
)

langfuse.flush()
sleep(1)

generation = get_api().observations.get_many(name=generation_name, type="GENERATION")

assert len(generation.data) != 0
generation_data = generation.data[0]
assert generation_data.name == generation_name
assert generation_data.metadata["async"] is True
assert generation_data.input == "A sunset over mountains"
assert generation_data.type == "GENERATION"
assert "dall-e-3" in generation_data.model


@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()")
def test_openai_image_edit(openai):
"""Test that image edit calls are tracked in Langfuse.

Note: images.edit() accepts a prompt parameter which is tracked as input.
Skipped by default as it requires DALL-E 2 API access and PNG image files.
"""
pass


@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()")
def test_openai_image_create_variation(openai):
"""Test that image variation calls are tracked in Langfuse.

Note: images.create_variation() does NOT accept a prompt parameter,
so input will be None. This is expected behavior - the method only
takes an image file as input, which is not captured as text input.
"""
pass


@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()")
@pytest.mark.asyncio
async def test_openai_image_edit_async(openai):
"""Test that async image edit calls are tracked in Langfuse."""
pass


@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()")
@pytest.mark.asyncio
async def test_openai_image_create_variation_async(openai):
"""Test that async image variation calls are tracked in Langfuse."""
pass