|
15 | 15 | import logging |
16 | 16 | import numpy as np |
17 | 17 | import os |
| 18 | +import signal |
| 19 | +import six |
| 20 | +import sys |
18 | 21 | import time |
19 | 22 |
|
20 | 23 | from paddle_serving_client import Client |
@@ -53,7 +56,7 @@ def __init__(self, server_id, server, stop_event_id, state=PENDING): |
53 | 56 |
|
54 | 57 | def predict_manage_worker(process, server_queue, server_result_queue, |
55 | 58 | require_num, predict_stop_events, get_servers_fun, |
56 | | - stop_event): |
| 59 | + stop_event, predict_cond): |
57 | 60 | """ thread that manage predict worker """ |
58 | 61 | num_shutdown_process = [0] |
59 | 62 |
|
@@ -129,6 +132,34 @@ def shutdown_one_process(): |
129 | 132 | except queue.Empty: |
130 | 133 | pass |
131 | 134 |
|
| 135 | + def clean_queue(data_queue): |
| 136 | + while True: |
| 137 | + try: |
| 138 | + data_queue.get_nowait() |
| 139 | + except Exception: |
| 140 | + break |
| 141 | + |
| 142 | + clean_queue(server_queue) |
| 143 | + clean_queue(server_result_queue) |
| 144 | + |
| 145 | + with predict_cond: |
| 146 | + for predict_stop_event in predict_stop_events: |
| 147 | + predict_stop_event.set() |
| 148 | + predict_cond.notify_all() |
| 149 | + |
| 150 | + for i in range(require_num): |
| 151 | + shutdown_one_process() |
| 152 | + clean_queue(server_result_queue) |
| 153 | + |
| 154 | + for i in range(20): |
| 155 | + shutdown_process = 0 |
| 156 | + for p in process: |
| 157 | + if not p.is_alive(): |
| 158 | + shutdown_process += 1 |
| 159 | + if shutdown_process == len(process): |
| 160 | + break |
| 161 | + time.sleep(1) |
| 162 | + |
132 | 163 |
|
133 | 164 | class _PoisonPill: |
134 | 165 | def __init__(self, feed_count, predict_count=0): |
@@ -284,22 +315,38 @@ def __del__(self): |
284 | 315 | def predict_worker(server_queue, server_result_queue, working_predict_count, |
285 | 316 | in_queue, out_queue, feeds, fetchs, conf_file, stop_events, |
286 | 317 | predict_lock, global_finished_task, predict_cond): |
287 | | - while True: |
288 | | - # get server |
289 | | - server_item = server_queue.get() |
290 | | - if server_item is None: |
291 | | - server_queue.put(None) # poison_pill |
292 | | - return |
293 | | - |
294 | | - # predict |
295 | | - success = predict_loop(server_item, working_predict_count, in_queue, |
296 | | - out_queue, feeds, fetchs, conf_file, |
297 | | - stop_events, predict_lock, global_finished_task, |
298 | | - predict_cond) |
| 318 | + signal_exit = [False, ] |
| 319 | + |
| 320 | + # Define signal handler function |
| 321 | + def predict_signal_handle(signum, frame): |
| 322 | + signal_exit[0] = True |
| 323 | + exit(0) |
| 324 | + |
| 325 | + # register signal.SIGTERM's handler |
| 326 | + signal.signal(signal.SIGTERM, predict_signal_handle) |
| 327 | + |
| 328 | + try: |
| 329 | + while True: |
| 330 | + # get server |
| 331 | + server_item = server_queue.get() |
| 332 | + if server_item is None: |
| 333 | + server_result_queue.put(None) |
| 334 | + return |
299 | 335 |
|
300 | | - server_item.state = ServerItem.FINISHED if success else ServerItem.ERROR |
301 | | - server_result_queue.put(server_item) |
302 | | - logger.info('Stopped server={}'.format(server_item.server)) |
| 336 | + # predict |
| 337 | + success = predict_loop(server_item, working_predict_count, |
| 338 | + in_queue, out_queue, feeds, fetchs, |
| 339 | + conf_file, stop_events, predict_lock, |
| 340 | + global_finished_task, predict_cond) |
| 341 | + |
| 342 | + server_item.state = ServerItem.FINISHED if success else ServerItem.ERROR |
| 343 | + server_result_queue.put(server_item) |
| 344 | + logger.info('Stopped server={}'.format(server_item.server)) |
| 345 | + except Exception as e: |
| 346 | + if signal_exit[0] is True: |
| 347 | + pass |
| 348 | + else: |
| 349 | + six.reraise(*sys.exc_info()) |
303 | 350 |
|
304 | 351 |
|
305 | 352 | def predict_loop(server_item, working_predict_count, in_queue, out_queue, |
@@ -365,7 +412,8 @@ def predict_loop(server_item, working_predict_count, in_queue, out_queue, |
365 | 412 | out_queue.put(poison_pill) # poison consumer |
366 | 413 | else: |
367 | 414 | in_queue.put(poison_pill) # poison other predict worker |
368 | | - |
| 415 | + if stop_event.is_set(): |
| 416 | + break |
369 | 417 | # wait next reader iter or last failed predict job |
370 | 418 | predict_cond.wait() |
371 | 419 |
|
@@ -435,22 +483,40 @@ def reader_worker(reader, reader_type, teacher_batch_size, out_queue, |
435 | 483 | # consumer may recv out-of-order task(3, 1, 2) before task(0), consumer will store then, |
436 | 484 | # when task(0) is completed and consumer recv it, it will release semaphore, |
437 | 485 | # reader go on working. |
| 486 | + |
| 487 | + signal_exit = [False, ] |
| 488 | + |
| 489 | + def reader_signal_handle(signum, frame): |
| 490 | + signal_exit[0] = True |
| 491 | + exit(0) |
| 492 | + |
| 493 | + # register signal.SIGTERM's handler |
| 494 | + signal.signal(signal.SIGTERM, reader_signal_handle) |
| 495 | + |
438 | 496 | read_func_map = { |
439 | 497 | ReaderType.SAMPLE: read_sample, |
440 | 498 | ReaderType.SAMPLE_LIST: read_sample_list, |
441 | 499 | ReaderType.BATCH: read_batch |
442 | 500 | } |
443 | 501 | read_func = read_func_map[reader_type] |
444 | 502 |
|
445 | | - while not stop_event.is_set(): |
446 | | - task_size = read_func(reader, teacher_batch_size, out_queue, |
447 | | - task_semaphore) |
448 | | - |
449 | | - poison_pill = _PoisonPill(task_size) |
450 | | - with reader_cond: |
451 | | - out_queue.put(poison_pill) |
452 | | - # wait next reader iter |
453 | | - reader_cond.wait() |
| 503 | + try: |
| 504 | + while not stop_event.is_set(): |
| 505 | + task_size = read_func(reader, teacher_batch_size, out_queue, |
| 506 | + task_semaphore) |
| 507 | + |
| 508 | + poison_pill = _PoisonPill(task_size) |
| 509 | + with reader_cond: |
| 510 | + out_queue.put(poison_pill) |
| 511 | + if stop_event.is_set(): |
| 512 | + break |
| 513 | + # wait next reader iter |
| 514 | + reader_cond.wait() |
| 515 | + except Exception as e: |
| 516 | + if signal_exit[0] is True: |
| 517 | + pass |
| 518 | + else: |
| 519 | + six.reraise(*sys.exc_info()) |
454 | 520 |
|
455 | 521 |
|
456 | 522 | def read_sample(reader, teacher_batch_size, out_queue, task_semaphore): |
|
0 commit comments