Skip to content

Commit cc5183b

Browse files
feat(e2e-test): test for model deletion steps (#7004)
* test for model deletion steps * fix logger * fix field logger * fix(e2e-tests): model watcher dependency and feature file (#7005) * fix model watcher dependency and context * fix model ready * refactor feature --------- Co-authored-by: Miguel Angel <[email protected]>
1 parent bfa38fb commit cc5183b

File tree

8 files changed

+266
-129
lines changed

8 files changed

+266
-129
lines changed

tests/integration/godog/features/model/explicit_model_deployment.feature renamed to tests/integration/godog/features/model/custom_model_deployment.feature

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
1-
@ModelDeployment @Functional @Models @Explicit
1+
@ModelDeployment @Functional @Models @CustomModelSpec
22
Feature: Explicit Model deployment
33
I deploy a custom model spec, wait for model to be deployed to the servers
4-
and send an inference request to that model
4+
and send an inference request to that model and expect a successful response.
5+
I then delete the model and send inference requests and expect them to fail.
56

67
Scenario: Load model and send inference request to envoy
7-
Given I deploy model spec:
8+
Given I deploy model spec with timeout "10s":
89
"""
910
apiVersion: mlops.seldon.io/v1alpha1
1011
kind: Model
1112
metadata:
12-
name: iris
13+
name: alpha-1
1314
spec:
1415
replicas: 1
1516
requirements:
1617
- sklearn
1718
- mlserver
1819
storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
1920
"""
20-
When the model "iris" should eventually become Ready with timeout "20s"
21-
Then send HTTP inference request with timeout "20s" to model "iris" with payload:
21+
When the model "alpha-1" should eventually become Ready with timeout "20s"
22+
Then send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
2223
"""
2324
{
2425
"inputs": [
@@ -51,7 +52,7 @@ Feature: Explicit Model deployment
5152
}
5253
] }
5354
"""
54-
Then send gRPC inference request with timeout "20s" to model "iris" with payload:
55+
Then send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
5556
"""
5657
{
5758
"inputs": [
@@ -82,4 +83,35 @@ Feature: Explicit Model deployment
8283
"contents": {"int64_contents" : [2]}
8384
}
8485
] }
85-
"""
86+
"""
87+
Then delete the model "alpha-1" with timeout "10s"
88+
Then send HTTP inference request with timeout "20s" to model "alpha-1" with payload:
89+
"""
90+
{
91+
"inputs": [
92+
{
93+
"name": "predict",
94+
"shape": [1, 4],
95+
"datatype": "FP32",
96+
"data": [[1, 2, 3, 4]]
97+
}
98+
]
99+
}
100+
"""
101+
And expect http response status code "404"
102+
Then send gRPC inference request with timeout "20s" to model "alpha-1" with payload:
103+
"""
104+
{
105+
"inputs": [
106+
{
107+
"name": "predict",
108+
"shape": [1, 4],
109+
"datatype": "FP32",
110+
"contents": {
111+
"int64_contents" : [1, 2, 3, 4]
112+
}
113+
}
114+
]
115+
}
116+
"""
117+
And expect gRPC response error to contain "Unimplemented"

tests/integration/godog/features/model/deployment.feature

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,31 @@ Feature: Model deployment
44
As a model user
55
I need to create a Model resource and verify it is deployed
66

7-
@0
87
Scenario: Success - Load a model
98
Given I have an "iris" model
109
When the model is applied
1110
Then the model should eventually become Ready
1211

1312

14-
@0
1513
Scenario: Success - Load a model again
1614
Given I have an "iris" model
1715
When the model is applied
1816
Then the model should eventually become Ready
1917

20-
# this approach might be more reusable specially for complex test cases, its all how expressive we want to be
21-
Scenario: Load model
22-
Given I have a model:
18+
Scenario: Load a specific model
19+
Given I deploy model spec with timeout "10s":
2320
"""
24-
21+
apiVersion: mlops.seldon.io/v1alpha1
22+
kind: Model
23+
metadata:
24+
name: deployment-test-1
25+
spec:
26+
replicas: 1
27+
requirements:
28+
- sklearn
29+
- mlserver
30+
storageUri: gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn
2531
"""
26-
When the model is applied
2732
Then the model should eventually become Ready
2833

2934
Scenario: Success - Load a model and expect status model available
@@ -38,19 +43,10 @@ Feature: Model deployment
3843
When the model is applied
3944
Then the model should eventually become Ready
4045

41-
46+
# todo: change model type
4247
Scenario: Success - Load a big model
43-
Given I have an "large-model" model
48+
Given I have an "iris" model
4449
When the model is applied
4550
Then the model should eventually become Ready
4651

47-
# this would belong more to the feature of model server scheduling or capabilities
48-
Scenario: Fail Load Model - no server capabilities in cluster
49-
Given Given I have an "iris" model
50-
And the model has "xgboost" capabilities
51-
And there is no server in the cluster with capabilities "xgboost"
52-
When the model is applied
53-
Then the model eventually becomes not Ready
54-
And the model status message should eventually be "ModelFailed"
55-
5652

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
Copyright (c) 2024 Seldon Technologies Ltd.
3+
4+
Use of this software is governed BY
5+
(1) the license included in the LICENSE file or
6+
(2) if the license included in the LICENSE file is the Business Source License 1.1,
7+
the Change License after the Change Date as each is defined in accordance with the LICENSE file.
8+
*/
9+
10+
package steps
11+
12+
import (
13+
"context"
14+
"errors"
15+
"fmt"
16+
17+
"github.com/seldonio/seldon-core/operator/v2/apis/mlops/v1alpha1"
18+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
19+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
"k8s.io/apimachinery/pkg/watch"
21+
)
22+
23+
// deleteModel we have to wait for model to be deleted, as there is a finalizer attached so the scheduler can confirm
24+
// when model has been unloaded from inference pod, model-gw, dataflow-engine, pipeline-gw and controller will remove
25+
// finalizer so deletion can complete.
26+
func (m *Model) deleteModel(ctx context.Context, model string) error {
27+
modelCR, err := m.k8sClient.MlopsV1alpha1().Models(m.namespace).Get(ctx, model, metav1.GetOptions{})
28+
if err != nil {
29+
if k8serrors.IsNotFound(err) {
30+
return fmt.Errorf("model %s can't be deleted, does not exist", model)
31+
}
32+
return fmt.Errorf("failed to get model %s", model)
33+
}
34+
35+
if err := m.k8sClient.MlopsV1alpha1().Models(m.namespace).Delete(ctx, model, metav1.DeleteOptions{}); err != nil {
36+
return fmt.Errorf("failed deleting model: %w", err)
37+
}
38+
39+
m.log.Debugf("Delete request for model %s sent", model)
40+
41+
watcher, err := m.k8sClient.MlopsV1alpha1().Models(m.namespace).Watch(ctx, metav1.ListOptions{
42+
FieldSelector: fmt.Sprintf("metadata.name=%s", model),
43+
ResourceVersion: modelCR.ResourceVersion,
44+
})
45+
if err != nil {
46+
return fmt.Errorf("failed watching model: %w", err)
47+
}
48+
defer watcher.Stop()
49+
50+
m.log.Debugf("Waiting for %s model deletion confirmation", model)
51+
52+
for {
53+
select {
54+
case <-ctx.Done():
55+
return ctx.Err()
56+
case event, ok := <-watcher.ResultChan():
57+
if !ok {
58+
return errors.New("watcher channel closed")
59+
}
60+
if event.Type == watch.Error {
61+
return fmt.Errorf("watch error: %v", event.Object)
62+
}
63+
if event.Type == watch.Deleted {
64+
return nil
65+
}
66+
}
67+
}
68+
}
69+
70+
func (m *Model) waitForModelReady(ctx context.Context, model string) error {
71+
foundModel, err := m.k8sClient.MlopsV1alpha1().Models(m.namespace).Get(ctx, model, metav1.GetOptions{})
72+
if err != nil {
73+
return fmt.Errorf("failed getting model: %w", err)
74+
}
75+
76+
if foundModel.Status.IsReady() {
77+
return nil
78+
}
79+
80+
watcher, err := m.k8sClient.MlopsV1alpha1().Models(m.namespace).Watch(ctx, metav1.ListOptions{
81+
FieldSelector: fmt.Sprintf("metadata.name=%s", model),
82+
ResourceVersion: foundModel.ResourceVersion,
83+
Watch: true,
84+
})
85+
if err != nil {
86+
return fmt.Errorf("failed subscribed to watch model: %w", err)
87+
}
88+
defer watcher.Stop()
89+
90+
for {
91+
select {
92+
case <-ctx.Done():
93+
return ctx.Err()
94+
case event, ok := <-watcher.ResultChan():
95+
if !ok {
96+
return fmt.Errorf("watch channel closed")
97+
}
98+
99+
if event.Type == watch.Error {
100+
return fmt.Errorf("watch error: %v", event.Object)
101+
}
102+
103+
if event.Type == watch.Added || event.Type == watch.Modified {
104+
model := event.Object.(*v1alpha1.Model)
105+
if model.Status.IsReady() {
106+
return nil
107+
}
108+
}
109+
110+
if event.Type == watch.Deleted {
111+
return fmt.Errorf("resource was deleted")
112+
}
113+
}
114+
}
115+
}

tests/integration/godog/steps/explicit_model_steps.go

Lines changed: 0 additions & 63 deletions
This file was deleted.

tests/integration/godog/steps/infer.go

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,23 +61,32 @@ func (i *inference) sendGRPCModelInferenceRequest(ctx context.Context, model str
6161
ctx = metadata.NewOutgoingContext(context.Background(), md)
6262
resp, err := i.grpc.ModelInfer(ctx, msg)
6363
if err != nil {
64-
return fmt.Errorf("could not send grpc model inference: %w", err)
64+
i.lastGRPCResponse.err = err
6565
}
6666

67-
i.lastGRPCResponse = resp
67+
i.lastGRPCResponse.response = resp
6868
return nil
6969
}
7070

7171
func withTimeoutCtx(timeout string, callback func(ctx context.Context) error) error {
72-
timeoutDuration, err := time.ParseDuration(timeout)
72+
ctx, cancel, err := timeoutToContext(timeout)
7373
if err != nil {
74-
return fmt.Errorf("invalid timeout %s: %w", timeout, err)
74+
return err
7575
}
76-
ctx, cancel := context.WithTimeout(context.Background(), timeoutDuration)
7776
defer cancel()
7877
return callback(ctx)
7978
}
8079

80+
func timeoutToContext(timeout string) (context.Context, context.CancelFunc, error) {
81+
d, err := time.ParseDuration(timeout)
82+
if err != nil {
83+
return nil, nil, fmt.Errorf("invalid timeout %s: %w", timeout, err)
84+
}
85+
86+
ctx, cancel := context.WithTimeout(context.Background(), d)
87+
return ctx, cancel, nil
88+
}
89+
8190
func isSubset(needle, hay any) bool {
8291
nObj, nOK := needle.(map[string]any)
8392
hObj, hOK := hay.(map[string]any)
@@ -126,12 +135,24 @@ func jsonContainsObjectSubset(jsonStr, needleStr string) (bool, error) {
126135
return containsSubset(needle, hay), nil
127136
}
128137

138+
func (i *inference) gRPCRespContainsError(err string) error {
139+
if i.lastGRPCResponse.err == nil {
140+
return errors.New("no gRPC response error found")
141+
}
142+
143+
if strings.Contains(i.lastGRPCResponse.err.Error(), err) {
144+
return nil
145+
}
146+
147+
return fmt.Errorf("error %s does not contain %s", i.lastGRPCResponse.err.Error(), err)
148+
}
149+
129150
func (i *inference) gRPCRespCheckBodyContainsJSON(expectJSON *godog.DocString) error {
130-
if i.lastGRPCResponse == nil {
151+
if i.lastGRPCResponse.response == nil {
131152
return errors.New("no gRPC response found")
132153
}
133154

134-
gotJson, err := json.Marshal(i.lastGRPCResponse)
155+
gotJson, err := json.Marshal(i.lastGRPCResponse.response)
135156
if err != nil {
136157
return fmt.Errorf("could not marshal gRPC json: %w", err)
137158
}

0 commit comments

Comments
 (0)