diff --git a/kag/common/text_sim_by_vector.py b/kag/common/text_sim_by_vector.py index 9f85af8a..d5ba8e74 100644 --- a/kag/common/text_sim_by_vector.py +++ b/kag/common/text_sim_by_vector.py @@ -61,7 +61,13 @@ def sentence_encode(self, sentences, is_cached=False): else: need_call_emb_text.append(text) if len(need_call_emb_text) > 0: - emb_res = self.vectorize_model.vectorize(need_call_emb_text) + if len(need_call_emb_text) <= 10: + emb_res = self.vectorize_model.vectorize(need_call_emb_text) + else: + emb_res = [] + for i in range((len(need_call_emb_text) + 9 )// 10): + emb_res_part = self.vectorize_model.vectorize(need_call_emb_text[i*10:(i+1)*10]) + emb_res += emb_res_part for text, text_emb in zip(need_call_emb_text, emb_res): tmp_map[text] = text_emb if is_cached: