Skip to content
Merged
Show file tree
Hide file tree
Changes from 50 commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
5c0d50d
support multimodal embedding
JohnJyong Oct 28, 2025
33ac54f
support multimodal embedding
JohnJyong Nov 5, 2025
070d826
support multimodal embedding
JohnJyong Nov 6, 2025
6a95b23
multimodal embedding
JohnJyong Nov 11, 2025
3b22d44
multimodal Embedding
JohnJyong Nov 12, 2025
98b25f2
multimodal Embedding
JohnJyong Nov 12, 2025
8e8d926
multimodal Embedding
JohnJyong Nov 12, 2025
779bed3
multimodal Embedding
JohnJyong Nov 12, 2025
aa94c68
multimodal Embedding LLM node
JohnJyong Nov 12, 2025
37e8050
multimodal Embedding LLM node
JohnJyong Nov 12, 2025
8d29b83
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Nov 12, 2025
734378f
migration
JohnJyong Nov 12, 2025
14be2a8
multimodal embedding
JohnJyong Nov 12, 2025
3f61fc7
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Nov 12, 2025
9bbb332
multimodal embedding
JohnJyong Nov 13, 2025
0456bae
multimodal embedding json schema
JohnJyong Nov 13, 2025
9350c22
multimodal embedding json schema
JohnJyong Nov 13, 2025
a3876c3
multimodal embedding update segment
JohnJyong Nov 13, 2025
3495358
multimodal embedding update segment
JohnJyong Nov 14, 2025
457cdf3
multimodal embedding update segment
JohnJyong Nov 14, 2025
f21b081
multimodal embedding update segment
JohnJyong Nov 14, 2025
462a3b3
multimodal embedding update segment
JohnJyong Nov 14, 2025
608a374
multimodal embedding update segment
JohnJyong Nov 14, 2025
2052cbc
multimodal embedding update segment
JohnJyong Nov 14, 2025
f51bfc5
multimodal embedding update segment
JohnJyong Nov 14, 2025
abd604f
multimodal embedding update segment
JohnJyong Nov 14, 2025
0583cc1
multimodal embedding update segment
JohnJyong Nov 17, 2025
997b9e3
multimodal embedding update segment
JohnJyong Nov 18, 2025
530120a
multimodal embedding update segment
JohnJyong Nov 21, 2025
5a6b8bb
multimodal embedding update segment
JohnJyong Nov 21, 2025
8dd6414
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Nov 24, 2025
f41c6e8
multimodal embedding update segment
JohnJyong Nov 25, 2025
13ab371
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Nov 25, 2025
c9fd47f
multimodal embedding update segment
JohnJyong Nov 25, 2025
464cf09
multimodal embedding update segment
JohnJyong Nov 26, 2025
687a07b
multimodal embedding update segment
JohnJyong Nov 26, 2025
7c80c3b
fix merge
JohnJyong Nov 27, 2025
8da8ed3
support urls
JohnJyong Nov 28, 2025
b3bcf03
support urls
JohnJyong Dec 2, 2025
a1dabdf
support urls
JohnJyong Dec 2, 2025
8eb8a25
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Dec 2, 2025
438e2ff
support urls
JohnJyong Dec 2, 2025
7994629
support urls
JohnJyong Dec 2, 2025
8da2bc5
delete auto update_at for segments
JohnJyong Dec 3, 2025
cd33fd2
delete auto update_at for segments
JohnJyong Dec 3, 2025
f5fdfc3
chatbot fix
JohnJyong Dec 3, 2025
60d5b9c
fix download image issue
JohnJyong Dec 4, 2025
7e575a3
fix download image issue
JohnJyong Dec 4, 2025
793542f
lint fix
JohnJyong Dec 4, 2025
025c1b5
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Dec 4, 2025
1d4858a
lint fix
JohnJyong Dec 4, 2025
27a6e5b
lint fix
JohnJyong Dec 4, 2025
02efe29
lint fix
JohnJyong Dec 4, 2025
7e0947d
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 4, 2025
d8c9e3a
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Dec 4, 2025
cc9df0f
lint fix
JohnJyong Dec 4, 2025
068da9f
Merge remote-tracking branch 'origin/feat/support-multimodal-embeddin…
JohnJyong Dec 4, 2025
d19d2c4
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 4, 2025
1df232e
lint fix
JohnJyong Dec 4, 2025
0937816
add env
JohnJyong Dec 5, 2025
9860971
add env
JohnJyong Dec 5, 2025
9056122
add env
JohnJyong Dec 5, 2025
9844532
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 5, 2025
718e08d
add env
JohnJyong Dec 5, 2025
3f5d08c
Merge remote-tracking branch 'origin/feat/support-multimodal-embeddin…
JohnJyong Dec 5, 2025
8744a07
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 5, 2025
13d7534
add env
JohnJyong Dec 5, 2025
0b6146d
Merge remote-tracking branch 'origin/feat/support-multimodal-embeddin…
JohnJyong Dec 5, 2025
697f2d4
fix mypy
JohnJyong Dec 5, 2025
33544eb
Merge branch 'main' into feat/support-multimodal-embedding
JohnJyong Dec 8, 2025
f75bf87
fix mypy
JohnJyong Dec 8, 2025
107933c
Update .env.example
JohnJyong Dec 8, 2025
6e5749d
Update .env.example
JohnJyong Dec 8, 2025
fde0b10
Update docker-compose.yaml
JohnJyong Dec 8, 2025
77c0768
fix mypy
JohnJyong Dec 8, 2025
02206dd
fix mypy
JohnJyong Dec 8, 2025
1919226
fix mypy
JohnJyong Dec 9, 2025
670fa1b
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 9, 2025
7731252
fix mypy
JohnJyong Dec 9, 2025
f03e5ed
Merge remote-tracking branch 'origin/feat/support-multimodal-embeddin…
JohnJyong Dec 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,26 @@ class FileUploadConfig(BaseSettings):
default=10,
)

IMAGE_FILE_BATCH_LIMIT: PositiveInt = Field(
description="Maximum number of files allowed in a image batch upload operation",
default=10,
)

SINGLE_CHUNK_ATTACHMENT_LIMIT: PositiveInt = Field(
description="Maximum number of files allowed in a single chunk attachment",
default=10,
)

ATTACHMENT_IMAGE_FILE_SIZE_LIMIT: NonNegativeInt = Field(
description="Maximum allowed image file size for attachments in megabytes",
default=2,
)

ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT: NonNegativeInt = Field(
description="Timeout for downloading image attachments in seconds",
default=60,
)

inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field(
description=(
"Comma-separated list of file extensions that are blocked from upload. "
Expand Down
6 changes: 3 additions & 3 deletions api/controllers/console/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,17 +494,17 @@ def patch(self, dataset_id):
args = parser.parse_args()
data = request.get_json()
current_user, current_tenant_id = current_account_with_tenant()

args["is_multimodal"] = False
# check embedding model setting
if (
data.get("indexing_technique") == "high_quality"
and data.get("embedding_model_provider") is not None
and data.get("embedding_model") is not None
):
DatasetService.check_embedding_model_setting(
is_multimodal = DatasetService.check_is_multimodal_model(
dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
)

args["is_multimodal"] = is_multimodal
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
DatasetPermissionService.check_permission(
current_user, dataset, data.get("permission"), data.get("partial_member_list")
Expand Down
4 changes: 4 additions & 0 deletions api/controllers/console/datasets/datasets_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,10 @@ def post(self):
model_type=ModelType.TEXT_EMBEDDING,
model=args["embedding_model"],
)
is_multimodal = DatasetService.check_is_multimodal_model(
current_tenant_id, args["embedding_model_provider"], args["embedding_model"]
)
knowledge_config.is_multimodal = is_multimodal
except InvokeAuthorizationError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
Expand Down
2 changes: 2 additions & 0 deletions api/controllers/console/datasets/datasets_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def post(self, dataset_id, document_id):
.add_argument("content", type=str, required=True, nullable=False, location="json")
.add_argument("answer", type=str, required=False, nullable=True, location="json")
.add_argument("keywords", type=list, required=False, nullable=True, location="json")
.add_argument("attachment_ids", type=list, required=False, nullable=True, location="json")
)
args = parser.parse_args()
SegmentService.segment_create_args_validate(args, document)
Expand Down Expand Up @@ -321,6 +322,7 @@ def patch(self, dataset_id, document_id, segment_id):
.add_argument(
"regenerate_child_chunks", type=bool, required=False, nullable=True, default=False, location="json"
)
.add_argument("attachment_ids", type=list, required=False, nullable=True, location="json")
)
args = parser.parse_args()
SegmentService.segment_create_args_validate(args, document)
Expand Down
4 changes: 3 additions & 1 deletion api/controllers/console/datasets/hit_testing_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def hit_testing_args_check(args):
def parse_args():
parser = (
reqparse.RequestParser()
.add_argument("query", type=str, location="json")
.add_argument("query", type=str, required=False, location="json")
.add_argument("attachment_ids", type=list, required=False, location="json")
.add_argument("retrieval_model", type=dict, required=False, location="json")
.add_argument("external_retrieval_model", type=dict, required=False, location="json")
)
Expand All @@ -66,6 +67,7 @@ def perform_hit_testing(dataset, args):
account=current_user,
retrieval_model=args["retrieval_model"],
external_retrieval_model=args["external_retrieval_model"],
attachment_ids=args["attachment_ids"],
limit=10,
)
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
Expand Down
2 changes: 2 additions & 0 deletions api/controllers/console/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def get(self):
"video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT,
"audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT,
"workflow_file_upload_limit": dify_config.WORKFLOW_FILE_UPLOAD_LIMIT,
"image_file_batch_limit": dify_config.IMAGE_FILE_BATCH_LIMIT,
"single_chunk_attachment_limit": dify_config.SINGLE_CHUNK_ATTACHMENT_LIMIT,
}, 200

@setup_required
Expand Down
2 changes: 2 additions & 0 deletions api/core/app/apps/base_app_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def organize_prompt_messages(
context: str | None = None,
memory: TokenBufferMemory | None = None,
image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
context_files: list["File"] | None = None,
) -> tuple[list[PromptMessage], list[str] | None]:
"""
Organize prompt messages
Expand Down Expand Up @@ -111,6 +112,7 @@ def organize_prompt_messages(
memory=memory,
model_config=model_config,
image_detail_config=image_detail_config,
context_files=context_files,
)
else:
memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False))
Expand Down
7 changes: 6 additions & 1 deletion api/core/app/apps/chat/app_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def run(

# get context from datasets
context = None
context_files = []
if app_config.dataset and app_config.dataset.dataset_ids:
hit_callback = DatasetIndexToolCallbackHandler(
queue_manager,
Expand All @@ -156,7 +157,7 @@ def run(
)

dataset_retrieval = DatasetRetrieval(application_generate_entity)
context = dataset_retrieval.retrieve(
context, context_files = dataset_retrieval.retrieve(
app_id=app_record.id,
user_id=application_generate_entity.user_id,
tenant_id=app_record.tenant_id,
Expand All @@ -171,6 +172,9 @@ def run(
memory=memory,
message_id=message.id,
inputs=inputs,
vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get(
"enabled", False
),
)

# reorganize all inputs and template to prompt messages
Expand All @@ -186,6 +190,7 @@ def run(
context=context,
memory=memory,
image_detail_config=image_detail_config,
context_files=context_files,
)

# check hosting moderation
Expand Down
7 changes: 6 additions & 1 deletion api/core/app/apps/completion/app_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def run(

# get context from datasets
context = None
context_files = []
if app_config.dataset and app_config.dataset.dataset_ids:
hit_callback = DatasetIndexToolCallbackHandler(
queue_manager,
Expand All @@ -116,7 +117,7 @@ def run(
query = inputs.get(dataset_config.retrieve_config.query_variable, "")

dataset_retrieval = DatasetRetrieval(application_generate_entity)
context = dataset_retrieval.retrieve(
context, context_files = dataset_retrieval.retrieve(
app_id=app_record.id,
user_id=application_generate_entity.user_id,
tenant_id=app_record.tenant_id,
Expand All @@ -130,6 +131,9 @@ def run(
hit_callback=hit_callback,
message_id=message.id,
inputs=inputs,
vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get(
"enabled", False
),
)

# reorganize all inputs and template to prompt messages
Expand All @@ -144,6 +148,7 @@ def run(
query=query,
context=context,
image_detail_config=image_detail_config,
context_files=context_files,
)

# check hosting moderation
Expand Down
4 changes: 2 additions & 2 deletions api/core/callback_handler/index_tool_callback_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from core.app.entities.app_invoke_entities import InvokeFrom
from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.models.document import Document
from extensions.ext_database import db
from models.dataset import ChildChunk, DatasetQuery, DocumentSegment
Expand Down Expand Up @@ -59,7 +59,7 @@ def on_tool_end(self, documents: list[Document]):
document_id,
)
continue
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
child_chunk_stmt = select(ChildChunk).where(
ChildChunk.index_node_id == document.metadata["doc_id"],
ChildChunk.dataset_id == dataset_document.dataset_id,
Expand Down
64 changes: 52 additions & 12 deletions api/core/indexing_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import uuid
from typing import Any

from flask import current_app
from flask import Flask, current_app
from sqlalchemy import select
from sqlalchemy.orm.exc import ObjectDeletedError

Expand All @@ -21,7 +21,7 @@
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.rag.models.document import ChildDocument, Document
Expand All @@ -36,6 +36,7 @@
from extensions.ext_storage import storage
from libs import helper
from libs.datetime_utils import naive_utc_now
from models import Account
from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
from models.dataset import Document as DatasetDocument
from models.model import UploadFile
Expand Down Expand Up @@ -89,8 +90,17 @@ def run(self, dataset_documents: list[DatasetDocument]):
text_docs = self._extract(index_processor, requeried_document, processing_rule.to_dict())

# transform
current_user = db.session.query(Account).filter_by(id=requeried_document.created_by).first()
if not current_user:
raise ValueError("no current user found")
current_user.set_tenant_id(dataset.tenant_id)
documents = self._transform(
index_processor, dataset, text_docs, requeried_document.doc_language, processing_rule.to_dict()
index_processor,
dataset,
text_docs,
requeried_document.doc_language,
processing_rule.to_dict(),
current_user=current_user,
)
# save segment
self._load_segments(dataset, requeried_document, documents)
Expand Down Expand Up @@ -136,7 +146,7 @@ def run_in_splitting_status(self, dataset_document: DatasetDocument):

for document_segment in document_segments:
db.session.delete(document_segment)
if requeried_document.doc_form == IndexType.PARENT_CHILD_INDEX:
if requeried_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
# delete child chunks
db.session.query(ChildChunk).where(ChildChunk.segment_id == document_segment.id).delete()
db.session.commit()
Expand All @@ -152,8 +162,17 @@ def run_in_splitting_status(self, dataset_document: DatasetDocument):
text_docs = self._extract(index_processor, requeried_document, processing_rule.to_dict())

# transform
current_user = db.session.query(Account).filter_by(id=requeried_document.created_by).first()
if not current_user:
raise ValueError("no current user found")
current_user.set_tenant_id(dataset.tenant_id)
documents = self._transform(
index_processor, dataset, text_docs, requeried_document.doc_language, processing_rule.to_dict()
index_processor,
dataset,
text_docs,
requeried_document.doc_language,
processing_rule.to_dict(),
current_user=current_user,
)
# save segment
self._load_segments(dataset, requeried_document, documents)
Expand Down Expand Up @@ -209,7 +228,7 @@ def run_in_indexing_status(self, dataset_document: DatasetDocument):
"dataset_id": document_segment.dataset_id,
},
)
if requeried_document.doc_form == IndexType.PARENT_CHILD_INDEX:
if requeried_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
child_chunks = document_segment.get_child_chunks()
if child_chunks:
child_documents = []
Expand Down Expand Up @@ -302,6 +321,7 @@ def indexing_estimate(
text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
documents = index_processor.transform(
text_docs,
current_user=None,
embedding_model_instance=embedding_model_instance,
process_rule=processing_rule.to_dict(),
tenant_id=tenant_id,
Expand Down Expand Up @@ -551,7 +571,10 @@ def _load(
indexing_start_at = time.perf_counter()
tokens = 0
create_keyword_thread = None
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
if (
dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
and dataset.indexing_technique == "economy"
):
# create keyword index
create_keyword_thread = threading.Thread(
target=self._process_keyword_index,
Expand Down Expand Up @@ -590,7 +613,7 @@ def _load(
for future in futures:
tokens += future.result()
if (
dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX
dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
and dataset.indexing_technique == "economy"
and create_keyword_thread is not None
):
Expand Down Expand Up @@ -635,7 +658,13 @@ def _process_keyword_index(flask_app, dataset_id, document_id, documents):
db.session.commit()

def _process_chunk(
self, flask_app, index_processor, chunk_documents, dataset, dataset_document, embedding_model_instance
self,
flask_app: Flask,
index_processor: BaseIndexProcessor,
chunk_documents: list[Document],
dataset: Dataset,
dataset_document: DatasetDocument,
embedding_model_instance: ModelInstance | None,
):
with flask_app.app_context():
# check document is paused
Expand All @@ -646,8 +675,15 @@ def _process_chunk(
page_content_list = [document.page_content for document in chunk_documents]
tokens += sum(embedding_model_instance.get_text_embedding_num_tokens(page_content_list))

multimodal_documents = []
for document in chunk_documents:
if document.attachments and dataset.is_multimodal:
multimodal_documents.extend(document.attachments)

# load index
index_processor.load(dataset, chunk_documents, with_keywords=False)
index_processor.load(
dataset, chunk_documents, multimodal_documents=multimodal_documents, with_keywords=False
)

document_ids = [document.metadata["doc_id"] for document in chunk_documents]
db.session.query(DocumentSegment).where(
Expand Down Expand Up @@ -710,6 +746,7 @@ def _transform(
text_docs: list[Document],
doc_language: str,
process_rule: dict,
current_user: Account | None = None,
) -> list[Document]:
# get embedding model instance
embedding_model_instance = None
Expand All @@ -729,6 +766,7 @@ def _transform(

documents = index_processor.transform(
text_docs,
current_user,
embedding_model_instance=embedding_model_instance,
process_rule=process_rule,
tenant_id=dataset.tenant_id,
Expand All @@ -737,14 +775,16 @@ def _transform(

return documents

def _load_segments(self, dataset, dataset_document, documents):
def _load_segments(self, dataset: Dataset, dataset_document: DatasetDocument, documents: list[Document]):
# save node to document segment
doc_store = DatasetDocumentStore(
dataset=dataset, user_id=dataset_document.created_by, document_id=dataset_document.id
)

# add document segments
doc_store.add_documents(docs=documents, save_child=dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX)
doc_store.add_documents(
docs=documents, save_child=dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX
)

# update document status to indexing
cur_time = naive_utc_now()
Expand Down
Loading
Loading