langgenius · JohnJyong · Dec 9, 2025 · Oct 28, 2025 · Nov 5, 2025 · Nov 6, 2025
@@ -360,6 +360,26 @@ class FileUploadConfig(BaseSettings):
         default=10,
     )
 
+    IMAGE_FILE_BATCH_LIMIT: PositiveInt = Field(
+        description="Maximum number of files allowed in a image batch upload operation",
+        default=10,
+    )
+
+    SINGLE_CHUNK_ATTACHMENT_LIMIT: PositiveInt = Field(
+        description="Maximum number of files allowed in a single chunk attachment",
+        default=10,
+    )
+
+    ATTACHMENT_IMAGE_FILE_SIZE_LIMIT: NonNegativeInt = Field(
+        description="Maximum allowed image file size for attachments in megabytes",
+        default=2,
+    )
+
+    ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT: NonNegativeInt = Field(
+        description="Timeout for downloading image attachments in seconds",
+        default=60,
+    )
+
     inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field(
         description=(
             "Comma-separated list of file extensions that are blocked from upload. "

@@ -494,17 +494,17 @@ def patch(self, dataset_id):
         args = parser.parse_args()
         data = request.get_json()
         current_user, current_tenant_id = current_account_with_tenant()
-
+        args["is_multimodal"] = False
         # check embedding model setting
         if (
             data.get("indexing_technique") == "high_quality"
             and data.get("embedding_model_provider") is not None
             and data.get("embedding_model") is not None
         ):
-            DatasetService.check_embedding_model_setting(
+            is_multimodal = DatasetService.check_is_multimodal_model(
                 dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
             )
-
+            args["is_multimodal"] = is_multimodal
         # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
         DatasetPermissionService.check_permission(
             current_user, dataset, data.get("permission"), data.get("partial_member_list")

@@ -447,6 +447,10 @@ def post(self):
                     model_type=ModelType.TEXT_EMBEDDING,
                     model=args["embedding_model"],
                 )
+                is_multimodal = DatasetService.check_is_multimodal_model(
+                    current_tenant_id, args["embedding_model_provider"], args["embedding_model"]
+                )
+                knowledge_config.is_multimodal = is_multimodal
             except InvokeAuthorizationError:
                 raise ProviderNotInitializeError(
                     "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."

@@ -251,6 +251,7 @@ def post(self, dataset_id, document_id):
             .add_argument("content", type=str, required=True, nullable=False, location="json")
             .add_argument("answer", type=str, required=False, nullable=True, location="json")
             .add_argument("keywords", type=list, required=False, nullable=True, location="json")
+            .add_argument("attachment_ids", type=list, required=False, nullable=True, location="json")
         )
         args = parser.parse_args()
         SegmentService.segment_create_args_validate(args, document)
@@ -321,6 +322,7 @@ def patch(self, dataset_id, document_id, segment_id):
             .add_argument(
                 "regenerate_child_chunks", type=bool, required=False, nullable=True, default=False, location="json"
             )
+            .add_argument("attachment_ids", type=list, required=False, nullable=True, location="json")
         )
         args = parser.parse_args()
         SegmentService.segment_create_args_validate(args, document)

@@ -50,7 +50,8 @@ def hit_testing_args_check(args):
     def parse_args():
         parser = (
             reqparse.RequestParser()
-            .add_argument("query", type=str, location="json")
+            .add_argument("query", type=str, required=False, location="json")
+            .add_argument("attachment_ids", type=list, required=False, location="json")
             .add_argument("retrieval_model", type=dict, required=False, location="json")
             .add_argument("external_retrieval_model", type=dict, required=False, location="json")
         )
@@ -66,6 +67,7 @@ def perform_hit_testing(dataset, args):
                 account=current_user,
                 retrieval_model=args["retrieval_model"],
                 external_retrieval_model=args["external_retrieval_model"],
+                attachment_ids=args["attachment_ids"],
                 limit=10,
             )
             return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}

@@ -45,6 +45,8 @@ def get(self):
             "video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT,
             "audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT,
             "workflow_file_upload_limit": dify_config.WORKFLOW_FILE_UPLOAD_LIMIT,
+            "image_file_batch_limit": dify_config.IMAGE_FILE_BATCH_LIMIT,
+            "single_chunk_attachment_limit": dify_config.SINGLE_CHUNK_ATTACHMENT_LIMIT,
         }, 200
 
     @setup_required

@@ -83,6 +83,7 @@ def organize_prompt_messages(
         context: str | None = None,
         memory: TokenBufferMemory | None = None,
         image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
+        context_files: list["File"] | None = None,
     ) -> tuple[list[PromptMessage], list[str] | None]:
         """
         Organize prompt messages
@@ -111,6 +112,7 @@ def organize_prompt_messages(
                 memory=memory,
                 model_config=model_config,
                 image_detail_config=image_detail_config,
+                context_files=context_files,
             )
         else:
             memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False))

@@ -146,6 +146,7 @@ def run(
 
         # get context from datasets
         context = None
+        context_files = []
         if app_config.dataset and app_config.dataset.dataset_ids:
             hit_callback = DatasetIndexToolCallbackHandler(
                 queue_manager,
@@ -156,7 +157,7 @@ def run(
             )
 
             dataset_retrieval = DatasetRetrieval(application_generate_entity)
-            context = dataset_retrieval.retrieve(
+            context, context_files = dataset_retrieval.retrieve(
                 app_id=app_record.id,
                 user_id=application_generate_entity.user_id,
                 tenant_id=app_record.tenant_id,
@@ -171,6 +172,9 @@ def run(
                 memory=memory,
                 message_id=message.id,
                 inputs=inputs,
+                vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get(
+                    "enabled", False
+                ),
             )
 
         # reorganize all inputs and template to prompt messages
@@ -186,6 +190,7 @@ def run(
             context=context,
             memory=memory,
             image_detail_config=image_detail_config,
+            context_files=context_files,
         )
 
         # check hosting moderation

@@ -102,6 +102,7 @@ def run(
 
         # get context from datasets
         context = None
+        context_files = []
         if app_config.dataset and app_config.dataset.dataset_ids:
             hit_callback = DatasetIndexToolCallbackHandler(
                 queue_manager,
@@ -116,7 +117,7 @@ def run(
                 query = inputs.get(dataset_config.retrieve_config.query_variable, "")
 
             dataset_retrieval = DatasetRetrieval(application_generate_entity)
-            context = dataset_retrieval.retrieve(
+            context, context_files = dataset_retrieval.retrieve(
                 app_id=app_record.id,
                 user_id=application_generate_entity.user_id,
                 tenant_id=app_record.tenant_id,
@@ -130,6 +131,9 @@ def run(
                 hit_callback=hit_callback,
                 message_id=message.id,
                 inputs=inputs,
+                vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get(
+                    "enabled", False
+                ),
             )
 
         # reorganize all inputs and template to prompt messages
@@ -144,6 +148,7 @@ def run(
             query=query,
             context=context,
             image_detail_config=image_detail_config,
+            context_files=context_files,
         )
 
         # check hosting moderation

@@ -7,7 +7,7 @@
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.rag.index_processor.constant.index_type import IndexType
+from core.rag.index_processor.constant.index_type import IndexStructureType
 from core.rag.models.document import Document
 from extensions.ext_database import db
 from models.dataset import ChildChunk, DatasetQuery, DocumentSegment
@@ -59,7 +59,7 @@ def on_tool_end(self, documents: list[Document]):
                         document_id,
                     )
                     continue
-                if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
                     child_chunk_stmt = select(ChildChunk).where(
                         ChildChunk.index_node_id == document.metadata["doc_id"],
                         ChildChunk.dataset_id == dataset_document.dataset_id,

@@ -7,7 +7,7 @@
 import uuid
 from typing import Any
 
-from flask import current_app
+from flask import Flask, current_app
 from sqlalchemy import select
 from sqlalchemy.orm.exc import ObjectDeletedError
 
@@ -21,7 +21,7 @@
 from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.datasource_type import DatasourceType
 from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
-from core.rag.index_processor.constant.index_type import IndexType
+from core.rag.index_processor.constant.index_type import IndexStructureType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
 from core.rag.models.document import ChildDocument, Document
@@ -36,6 +36,7 @@
 from extensions.ext_storage import storage
 from libs import helper
 from libs.datetime_utils import naive_utc_now
+from models import Account
 from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from models.model import UploadFile
@@ -89,8 +90,17 @@ def run(self, dataset_documents: list[DatasetDocument]):
                 text_docs = self._extract(index_processor, requeried_document, processing_rule.to_dict())
 
                 # transform
+                current_user = db.session.query(Account).filter_by(id=requeried_document.created_by).first()
+                if not current_user:
+                    raise ValueError("no current user found")
+                current_user.set_tenant_id(dataset.tenant_id)
                 documents = self._transform(
-                    index_processor, dataset, text_docs, requeried_document.doc_language, processing_rule.to_dict()
+                    index_processor,
+                    dataset,
+                    text_docs,
+                    requeried_document.doc_language,
+                    processing_rule.to_dict(),
+                    current_user=current_user,
                 )
                 # save segment
                 self._load_segments(dataset, requeried_document, documents)
@@ -136,7 +146,7 @@ def run_in_splitting_status(self, dataset_document: DatasetDocument):
 
             for document_segment in document_segments:
                 db.session.delete(document_segment)
-                if requeried_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                if requeried_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
                     # delete child chunks
                     db.session.query(ChildChunk).where(ChildChunk.segment_id == document_segment.id).delete()
             db.session.commit()
@@ -152,8 +162,17 @@ def run_in_splitting_status(self, dataset_document: DatasetDocument):
             text_docs = self._extract(index_processor, requeried_document, processing_rule.to_dict())
 
             # transform
+            current_user = db.session.query(Account).filter_by(id=requeried_document.created_by).first()
+            if not current_user:
+                raise ValueError("no current user found")
+            current_user.set_tenant_id(dataset.tenant_id)
             documents = self._transform(
-                index_processor, dataset, text_docs, requeried_document.doc_language, processing_rule.to_dict()
+                index_processor,
+                dataset,
+                text_docs,
+                requeried_document.doc_language,
+                processing_rule.to_dict(),
+                current_user=current_user,
             )
             # save segment
             self._load_segments(dataset, requeried_document, documents)
@@ -209,7 +228,7 @@ def run_in_indexing_status(self, dataset_document: DatasetDocument):
                                 "dataset_id": document_segment.dataset_id,
                             },
                         )
-                        if requeried_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                        if requeried_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
                             child_chunks = document_segment.get_child_chunks()
                             if child_chunks:
                                 child_documents = []
@@ -302,6 +321,7 @@ def indexing_estimate(
             text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
             documents = index_processor.transform(
                 text_docs,
+                current_user=None,
                 embedding_model_instance=embedding_model_instance,
                 process_rule=processing_rule.to_dict(),
                 tenant_id=tenant_id,
@@ -551,7 +571,10 @@ def _load(
         indexing_start_at = time.perf_counter()
         tokens = 0
         create_keyword_thread = None
-        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
+        if (
+            dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
+            and dataset.indexing_technique == "economy"
+        ):
             # create keyword index
             create_keyword_thread = threading.Thread(
                 target=self._process_keyword_index,
@@ -590,7 +613,7 @@ def _load(
                 for future in futures:
                     tokens += future.result()
         if (
-            dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX
+            dataset_document.doc_form != IndexStructureType.PARENT_CHILD_INDEX
             and dataset.indexing_technique == "economy"
             and create_keyword_thread is not None
         ):
@@ -635,7 +658,13 @@ def _process_keyword_index(flask_app, dataset_id, document_id, documents):
                 db.session.commit()
 
     def _process_chunk(
-        self, flask_app, index_processor, chunk_documents, dataset, dataset_document, embedding_model_instance
+        self,
+        flask_app: Flask,
+        index_processor: BaseIndexProcessor,
+        chunk_documents: list[Document],
+        dataset: Dataset,
+        dataset_document: DatasetDocument,
+        embedding_model_instance: ModelInstance | None,
     ):
         with flask_app.app_context():
             # check document is paused
@@ -646,8 +675,15 @@ def _process_chunk(
                 page_content_list = [document.page_content for document in chunk_documents]
                 tokens += sum(embedding_model_instance.get_text_embedding_num_tokens(page_content_list))
 
+            multimodal_documents = []
+            for document in chunk_documents:
+                if document.attachments and dataset.is_multimodal:
+                    multimodal_documents.extend(document.attachments)
+
             # load index
-            index_processor.load(dataset, chunk_documents, with_keywords=False)
+            index_processor.load(
+                dataset, chunk_documents, multimodal_documents=multimodal_documents, with_keywords=False
+            )
 
             document_ids = [document.metadata["doc_id"] for document in chunk_documents]
             db.session.query(DocumentSegment).where(
@@ -710,6 +746,7 @@ def _transform(
         text_docs: list[Document],
         doc_language: str,
         process_rule: dict,
+        current_user: Account | None = None,
     ) -> list[Document]:
         # get embedding model instance
         embedding_model_instance = None
@@ -729,6 +766,7 @@ def _transform(
 
         documents = index_processor.transform(
             text_docs,
+            current_user,
             embedding_model_instance=embedding_model_instance,
             process_rule=process_rule,
             tenant_id=dataset.tenant_id,
@@ -737,14 +775,16 @@ def _transform(
 
         return documents
 
-    def _load_segments(self, dataset, dataset_document, documents):
+    def _load_segments(self, dataset: Dataset, dataset_document: DatasetDocument, documents: list[Document]):
         # save node to document segment
         doc_store = DatasetDocumentStore(
             dataset=dataset, user_id=dataset_document.created_by, document_id=dataset_document.id
         )
 
         # add document segments
-        doc_store.add_documents(docs=documents, save_child=dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX)
+        doc_store.add_documents(
+            docs=documents, save_child=dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX
+        )
 
         # update document status to indexing
         cur_time = naive_utc_now()