-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathservice.py
More file actions
127 lines (108 loc) · 5.47 KB
/
Copy pathservice.py
File metadata and controls
127 lines (108 loc) · 5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from typing import AsyncGenerator
import json
import asyncio
from utils.llm_provider import get_llm_provider, _fix_streaming_chunk_spacing
from models import StoryRequest
# Initialize the AI provider (automatically selects based on your API keys)
llm_provider = get_llm_provider()
"""
What is Prompt Engineering?
- The art of writing instructions that guide AI to produce desired output
- Good prompts are: clear, specific, and structured
- This is where you teach the AI what you want
Key Prompt Engineering Techniques Used Here:
1. Role-Setting: Tell the AI who it is ("bedtime storyteller")
2. Context: Provide user inputs (character name, age, theme)
3. Instructions: Specify format, length, style
4. Constraints: Set boundaries (age-appropriate, specific word count)
💡 Try This: Modify this prompt and see how the story quality changes!
"""
def build_story_prompt(request: StoryRequest) -> str:
"""
Builds a well-structured prompt for the AI
This function takes user inputs and combines them into a clear
instruction that tells the AI exactly what kind of story to write.
"""
# Define length requirements - maps user selection to specific instructions
length_map = {
"short": "3-5 paragraphs, approximately 40-60 words",
"medium": "5-7 paragraphs, approximately 100-150 words",
"long": "8-12 paragraphs, approximately 200-300 words"
}
# Build the prompt step by step for clarity
prompt = f"""You are a creative and gentle bedtime storyteller.
Write a personalized bedtime story with these details:
- Main character: {request.character_name}, age {request.character_age}
- Theme: {request.story_theme}
- Length: {length_map.get(request.story_length, length_map['medium'])}
Requirements:
1. Start with an engaging title
2. Write in clear paragraphs with natural breaks
3. Make it age-appropriate for a {request.character_age}-year-old
4. End with a gentle moral lesson about kindness, bravery, or friendship
5. Keep the tone warm, comforting, and suitable for bedtime
Begin the story now:"""
return prompt
"""
What is Streaming?
- Instead of waiting for the entire story, send it piece by piece
- Creates a "typing" effect like ChatGPT
- Feels faster and more interactive to users
How Streaming Works:
1. AI generates text in small chunks (tokens)
2. Each chunk is sent immediately to the frontend
3. Frontend displays chunks as they arrive
4. User sees story appear in real-time
Technical Details:
- Uses async generators (async functions with 'yield')
- Formats as Server-Sent Events (SSE) with "data: {json}\n\n"
- Keeps connection alive until story is complete
"""
async def generate_story_stream(request: StoryRequest) -> AsyncGenerator[str, None]:
"""
Generates and streams a story in real-time
This is an async generator - it yields chunks of data as they're generated.
Each yield sends data to the frontend immediately.
"""
# Step 1: Notify frontend that connection is established
yield f"data: {json.dumps({'status': 'connected', 'message': 'Starting story generation...'})}\n\n"
# Step 1.1: Emit thinking events for the creative process
thinking_analysis = {'thinking': {'category': 'analysis', 'content': f'Developing a story for a {request.character_age}-year-old named {request.character_name} with the theme of {request.story_theme}...', 'timestamp': 'now'}}
yield f"data: {json.dumps(thinking_analysis)}\n\n"
await asyncio.sleep(0.5)
yield f"data: {json.dumps({'thinking': {'category': 'planning', 'content': 'Planning the narrative arc and moral lesson...', 'timestamp': 'now'}})}\n\n"
await asyncio.sleep(0.5)
yield f"data: {json.dumps({'thinking': {'category': 'processing', 'content': 'Weaving the tale together...', 'timestamp': 'now'}})}\n\n"
try:
# Step 2: Build the prompt from user inputs
prompt = build_story_prompt(request)
# Step 3: Stream the story content from the AI
# The AI generates text chunk by chunk, and we forward each chunk immediately
story_content = ""
try:
async for chunk in llm_provider.generate_stream(
prompt,
temperature=0.8, # Controls creativity (0.0 = deterministic, 1.0+ = creative)
max_tokens=800 # Limits story length
):
# Fix spacing issues in streaming chunks (e.g., "night,5-year-old" -> "night, 5-year-old")
chunk = _fix_streaming_chunk_spacing(chunk)
story_content += chunk
# Send each chunk to frontend as it arrives
yield f"data: {json.dumps({'content': chunk})}\n\n"
except (RuntimeError, StopIteration) as e:
# StopIteration and some RuntimeErrors indicate normal completion
# (some async frameworks convert StopIteration to RuntimeError)
error_str = str(e).lower()
if "stopiteration" in error_str or "async generator" in error_str:
# Generator finished normally - this is expected, not an error
pass
else:
# Other RuntimeError - re-raise as it's a real error
raise
# Step 4: Signal that generation is complete
yield f"data: {json.dumps({'done': True, 'status': 'completed'})}\n\n"
except Exception as e:
# Handle errors gracefully
error_message = f"Error generating story: {str(e)}"
yield f"data: {json.dumps({'error': error_message, 'status': 'error'})}\n\n"