-
Notifications
You must be signed in to change notification settings - Fork 235
Expand file tree
/
Copy pathpaginated_results.py
More file actions
159 lines (122 loc) · 5.1 KB
/
Copy pathpaginated_results.py
File metadata and controls
159 lines (122 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
Paginated Results -- LayerLens Python SDK Sample
================================================
Demonstrates two approaches for fetching evaluation results:
1. **Manual pagination** -- iterate page-by-page using
``client.results.get(evaluation=, page=, page_size=)``
with full control over each request.
2. **Automatic pagination** -- fetch all results at once using
``client.results.get_all(evaluation=)`` which handles
pagination internally.
Prerequisites
-------------
* ``pip install layerlens --index-url https://sdk.layerlens.ai/package``
* Set ``LAYERLENS_STRATIX_API_KEY`` environment variable
* At least one completed evaluation in the project
Usage
-----
::
export LAYERLENS_STRATIX_API_KEY=your-api-key
python paginated_results.py
"""
from __future__ import annotations
from layerlens import Stratix
def main() -> None:
client = Stratix()
# ── Get a completed evaluation to work with ──────────────────────
models = client.models.get()
benchmarks = client.benchmarks.get()
if not models or not benchmarks:
print("No models or benchmarks available. Add them first.")
return
model = models[0]
benchmark = benchmarks[0]
print(f"Using model: {model.name}")
print(f"Using benchmark: {benchmark.name}")
# Create an evaluation and wait for it
evaluation = client.evaluations.create(model=model, benchmark=benchmark)
if not evaluation:
print("Failed to create evaluation")
return
print(f"Created evaluation {evaluation.id}, waiting for completion...")
evaluation = client.evaluations.wait_for_completion(
evaluation,
interval_seconds=10,
timeout_seconds=600,
)
print(f"Evaluation {evaluation.id} finished with status={evaluation.status}")
if not evaluation.is_success:
print("Evaluation did not succeed, no results to show.")
return
# ── Approach 1: Manual page-by-page iteration ─────────────────────
print("\n" + "=" * 60)
print("MANUAL PAGINATION")
print("=" * 60)
all_results = []
page = 1
page_size = 50
while True:
print(f"Fetching page {page} (page_size={page_size})...")
results_data = client.results.get(
evaluation=evaluation,
page=page,
page_size=page_size,
)
if not results_data or not results_data.results:
print("No more results to fetch")
break
all_results.extend(results_data.results)
# Show progress on first page
if page == 1:
total_count = results_data.pagination.total_count
total_pages = results_data.pagination.total_pages
print(f"Total results: {total_count:,}")
print(f"Total pages: {total_pages}")
print(f"Page {page}: Retrieved {len(results_data.results)} results (running total: {len(all_results):,})")
# Check if we have reached the last page
if page >= results_data.pagination.total_pages:
print("Reached last page")
break
page += 1
print(f"\nManual pagination complete: {len(all_results):,} results collected")
if all_results:
correct = sum(1 for r in all_results if r.score > 0.5)
accuracy = correct / len(all_results)
avg_score = sum(r.score for r in all_results) / len(all_results)
print(f"Accuracy: {accuracy:.1%} ({correct:,}/{len(all_results):,})")
print(f"Average score: {avg_score:.3f}")
print(f"\nFirst 3 results:")
for i, result in enumerate(all_results[:3], 1):
print(f" {i}. Score: {result.score:.3f}, Subset: {result.subset}")
print(f" Prompt: {result.prompt[:100]}...")
print(f" Response: {result.result[:100]}...")
# ── Alternative: get_by_id (using evaluation_id instead of object) ─
print("\n" + "=" * 60)
print("ALTERNATIVE: results.get_by_id(evaluation_id=...)")
print("=" * 60)
try:
results_data = client.results.get_by_id(
evaluation_id=evaluation.id,
page=1,
page_size=10,
)
if results_data and results_data.results:
print(
f"get_by_id returned {len(results_data.results)} results (total: {results_data.pagination.total_count})"
)
else:
print("get_by_id returned no results")
except Exception as exc:
print(f"results.get_by_id() not available: {exc}")
# ── Approach 2: Automatic get_all ─────────────────────────────────
print("\n" + "=" * 60)
print("AUTOMATIC PAGINATION (get_all)")
print("=" * 60)
all_results_auto = client.results.get_all(evaluation=evaluation)
print(f"Retrieved {len(all_results_auto)} results in one call")
if all_results_auto:
avg_score = sum(r.score for r in all_results_auto) / len(all_results_auto)
print(f"Average score: {avg_score:.3f}")
print("\nDone.")
if __name__ == "__main__":
main()