From f438bb368855fb57d9226a1573f3e58efcb72b99 Mon Sep 17 00:00:00 2001 From: Cole Tebou Date: Wed, 4 Feb 2026 23:13:08 +0000 Subject: [PATCH] feat(openai): add support for image generation tracking Adds automatic tracing for OpenAI image generation API calls: - images.generate() - images.edit() - images.create_variation() Changes: - Added OpenAiDefinition entries for image methods (sync/async) - Extract image parameters (size, quality, style, response_format, n) - Parse image responses (URLs and base64 wrapped in LangfuseMedia) - Track image count as usage metrics - Handle create_variation() which has no prompt parameter (input=None) Tests: - Added tests for sync/async images.generate() - Added skipped stubs for edit()/create_variation() (require DALL-E 2 + image files) --- langfuse/openai.py | 166 ++++++++++++++++++++++++++++++++++++++++++- tests/test_openai.py | 100 ++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 3 deletions(-) diff --git a/langfuse/openai.py b/langfuse/openai.py index 92d9346f4..668b185d9 100644 --- a/langfuse/openai.py +++ b/langfuse/openai.py @@ -90,6 +90,48 @@ class OpenAiDefinition: type="chat", sync=True, ), + OpenAiDefinition( + module="openai.resources.images", + object="Images", + method="generate", + type="image", + sync=True, + ), + OpenAiDefinition( + module="openai.resources.images", + object="AsyncImages", + method="generate", + type="image", + sync=False, + ), + OpenAiDefinition( + module="openai.resources.images", + object="Images", + method="edit", + type="image", + sync=True, + ), + OpenAiDefinition( + module="openai.resources.images", + object="AsyncImages", + method="edit", + type="image", + sync=False, + ), + OpenAiDefinition( + module="openai.resources.images", + object="Images", + method="create_variation", + type="image", + sync=True, + ), + OpenAiDefinition( + module="openai.resources.images", + object="AsyncImages", + method="create_variation", + type="image", + sync=False, + ), OpenAiDefinition( module="openai.resources.completions", object="Completions", @@ -354,9 +396,12 @@ def _extract_chat_response(kwargs: Any) -> Any: def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> Any: - default_name = ( - "OpenAI-embedding" if resource.type == "embedding" else "OpenAI-generation" - ) + if resource.type == "embedding": + default_name = "OpenAI-embedding" + elif resource.type == "image": + default_name = "OpenAI-image" + else: + default_name = "OpenAI-generation" name = kwargs.get("name", default_name) if name is None: @@ -417,6 +462,12 @@ def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> A prompt = _extract_chat_prompt(kwargs) elif resource.type == "embedding": prompt = kwargs.get("input", None) + elif resource.type == "image": + # generate() and edit() accept prompt, but create_variation() does not + if resource.method in ["generate", "edit"]: + prompt = kwargs.get("prompt", None) + else: + prompt = None # create_variation uses image input, not text prompt parsed_temperature = ( kwargs.get("temperature", 1) @@ -479,6 +530,44 @@ def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> A modelParameters["dimensions"] = parsed_dimensions if parsed_encoding_format != "float": modelParameters["encoding_format"] = parsed_encoding_format + elif resource.type == "image": + # Image generation parameters + modelParameters = {} + + parsed_size = ( + kwargs.get("size", None) + if not isinstance(kwargs.get("size", None), NotGiven) + else None + ) + if parsed_size is not None: + modelParameters["size"] = parsed_size + + parsed_quality = ( + kwargs.get("quality", None) + if not isinstance(kwargs.get("quality", None), NotGiven) + else None + ) + if parsed_quality is not None: + modelParameters["quality"] = parsed_quality + + parsed_style = ( + kwargs.get("style", None) + if not isinstance(kwargs.get("style", None), NotGiven) + else None + ) + if parsed_style is not None: + modelParameters["style"] = parsed_style + + parsed_response_format = ( + kwargs.get("response_format", None) + if not isinstance(kwargs.get("response_format", None), NotGiven) + else None + ) + if parsed_response_format is not None: + modelParameters["response_format"] = parsed_response_format + + if parsed_n is not None and isinstance(parsed_n, int) and parsed_n > 1: + modelParameters["n"] = parsed_n else: modelParameters = { "temperature": parsed_temperature, @@ -791,6 +880,33 @@ def _get_langfuse_data_from_default_response( "count": len(data), } + elif resource.type == "image": + data = response.get("data", []) + completion = [] + for item in data: + image_data = item.__dict__ if hasattr(item, "__dict__") else item + image_result = {} + + # Handle URL response + if image_data.get("url"): + image_result["url"] = image_data["url"] + + # Handle base64 response + if image_data.get("b64_json"): + # Wrap in LangfuseMedia for proper handling + base64_data_uri = f"data:image/png;base64,{image_data['b64_json']}" + image_result["image"] = LangfuseMedia(base64_data_uri=base64_data_uri) + + # Include revised_prompt if present (DALL-E 3) + if image_data.get("revised_prompt"): + image_result["revised_prompt"] = image_data["revised_prompt"] + + completion.append(image_result) + + # If only one image, unwrap from list + if len(completion) == 1: + completion = completion[0] + usage = _parse_usage(response.get("usage", None)) return (model, completion, usage) @@ -842,6 +958,28 @@ def _wrap( try: openai_response = wrapped(**arg_extractor.get_openai_args()) + # Handle image generation (non-streaming) + if open_ai_resource.type == "image": + model, completion, usage = _get_langfuse_data_from_default_response( + open_ai_resource, + (openai_response and openai_response.__dict__) + if _is_openai_v1() + else openai_response, + ) + + # Calculate image count for usage tracking + image_count = 1 + if isinstance(completion, list): + image_count = len(completion) + + generation.update( + model=model, + output=completion, + usage_details={"output": image_count, "total": image_count, "unit": "IMAGES"}, + ).end() + + return openai_response + if _is_streaming_response(openai_response): return LangfuseResponseGeneratorSync( resource=open_ai_resource, @@ -913,6 +1051,28 @@ async def _wrap_async( try: openai_response = await wrapped(**arg_extractor.get_openai_args()) + # Handle image generation (non-streaming) + if open_ai_resource.type == "image": + model, completion, usage = _get_langfuse_data_from_default_response( + open_ai_resource, + (openai_response and openai_response.__dict__) + if _is_openai_v1() + else openai_response, + ) + + # Calculate image count for usage tracking + image_count = 1 + if isinstance(completion, list): + image_count = len(completion) + + generation.update( + model=model, + output=completion, + usage_details={"output": image_count, "total": image_count, "unit": "IMAGES"}, + ).end() + + return openai_response + if _is_streaming_response(openai_response): return LangfuseResponseGeneratorAsync( resource=open_ai_resource, diff --git a/tests/test_openai.py b/tests/test_openai.py index f24bf93cf..1d82ccfd3 100644 --- a/tests/test_openai.py +++ b/tests/test_openai.py @@ -1594,3 +1594,103 @@ async def test_async_openai_embeddings(openai): assert embedding_data.metadata["async"] is True assert embedding_data.usage.input is not None assert embedding_data.usage.total is not None + + +def test_openai_image_generation(openai): + """Test that image generation calls are tracked in Langfuse.""" + generation_name = "test_image_generation_" + create_uuid()[:8] + + response = openai.OpenAI().images.generate( + name=generation_name, + model="dall-e-3", + prompt="A white siamese cat", + size="1024x1024", + quality="standard", + n=1, + metadata={"test_key": "test_value"}, + ) + + langfuse.flush() + sleep(1) + + generation = get_api().observations.get_many(name=generation_name, type="GENERATION") + + assert len(generation.data) != 0 + generation_data = generation.data[0] + assert generation_data.name == generation_name + assert generation_data.metadata["test_key"] == "test_value" + assert generation_data.input == "A white siamese cat" + assert generation_data.type == "GENERATION" + assert "dall-e-3" in generation_data.model + assert generation_data.start_time is not None + assert generation_data.end_time is not None + assert generation_data.start_time < generation_data.end_time + assert generation_data.output is not None + # Check model parameters + assert generation_data.model_parameters is not None + assert generation_data.model_parameters.get("size") == "1024x1024" + assert generation_data.model_parameters.get("quality") == "standard" + + +@pytest.mark.asyncio +async def test_openai_image_generation_async(openai): + """Test that async image generation calls are tracked in Langfuse.""" + generation_name = "test_image_generation_async_" + create_uuid()[:8] + + response = await openai.AsyncOpenAI().images.generate( + name=generation_name, + model="dall-e-3", + prompt="A sunset over mountains", + size="1024x1024", + quality="standard", + n=1, + metadata={"async": True}, + ) + + langfuse.flush() + sleep(1) + + generation = get_api().observations.get_many(name=generation_name, type="GENERATION") + + assert len(generation.data) != 0 + generation_data = generation.data[0] + assert generation_data.name == generation_name + assert generation_data.metadata["async"] is True + assert generation_data.input == "A sunset over mountains" + assert generation_data.type == "GENERATION" + assert "dall-e-3" in generation_data.model + + +@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()") +def test_openai_image_edit(openai): + """Test that image edit calls are tracked in Langfuse. + + Note: images.edit() accepts a prompt parameter which is tracked as input. + Skipped by default as it requires DALL-E 2 API access and PNG image files. + """ + pass + + +@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()") +def test_openai_image_create_variation(openai): + """Test that image variation calls are tracked in Langfuse. + + Note: images.create_variation() does NOT accept a prompt parameter, + so input will be None. This is expected behavior - the method only + takes an image file as input, which is not captured as text input. + """ + pass + + +@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()") +@pytest.mark.asyncio +async def test_openai_image_edit_async(openai): + """Test that async image edit calls are tracked in Langfuse.""" + pass + + +@pytest.mark.skip(reason="Requires DALL-E 2 and image file setup; core tracking tested via generate()") +@pytest.mark.asyncio +async def test_openai_image_create_variation_async(openai): + """Test that async image variation calls are tracked in Langfuse.""" + pass