From 39f7baec83b1a6c3ef8e26f6bfa674a39a1048ea Mon Sep 17 00:00:00 2001 From: nsapin34 Date: Mon, 1 Dec 2025 14:29:34 +0100 Subject: [PATCH 1/3] change lab 2 Signed-off-by: nsapin34 --- workshops/2025_12_04/docling_lab_2.ipynb | 286 ++++++++++++++++++++--- 1 file changed, 252 insertions(+), 34 deletions(-) diff --git a/workshops/2025_12_04/docling_lab_2.ipynb b/workshops/2025_12_04/docling_lab_2.ipynb index 1ee69ca..6574b06 100644 --- a/workshops/2025_12_04/docling_lab_2.ipynb +++ b/workshops/2025_12_04/docling_lab_2.ipynb @@ -16,22 +16,158 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", - "I0000 00:00:1764342422.086269 24419707 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2mAudited \u001b[1m10 packages\u001b[0m \u001b[2min 61ms\u001b[0m\u001b[0m\n" + "Requirement already satisfied: langchain-docling in /usr/local/lib/python3.12/site-packages (2.0.0)\n", + "Requirement already satisfied: langchain-core in /usr/local/lib/python3.12/site-packages (1.1.0)\n", + "Requirement already satisfied: langchain-huggingface in /usr/local/lib/python3.12/site-packages (1.1.0)\n", + "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.12/site-packages (5.1.2)\n", + "Requirement already satisfied: langchain_milvus in /usr/local/lib/python3.12/site-packages (0.3.0)\n", + "Requirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.12/site-packages (1.0.0)\n", + "Requirement already satisfied: langchain-classic in /usr/local/lib/python3.12/site-packages (1.0.0)\n", + "Requirement already satisfied: langchain-openai in /usr/local/lib/python3.12/site-packages (1.1.0)\n", + "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.12/site-packages (1.2.1)\n", + "Requirement already satisfied: pymilvus[milvus_lite] in /usr/local/lib/python3.12/site-packages (2.6.4)\n", + "Requirement already satisfied: docling~=2.26 in /usr/local/lib/python3.12/site-packages (from langchain-docling) (2.63.0)\n", + "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (1.33)\n", + "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /usr/local/lib/python3.12/site-packages (from langchain-core) (0.4.49)\n", + "Requirement already satisfied: packaging<26.0.0,>=23.2.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (25.0)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.12/site-packages (from langchain-core) (2.12.5)\n", + "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (6.0.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (9.1.2)\n", + "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (4.15.0)\n", + "Requirement already satisfied: docling-core<3.0.0,>=2.50.1 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.53.0)\n", + "Requirement already satisfied: docling-parse<5.0.0,>=4.7.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.7.1)\n", + "Requirement already satisfied: docling-ibm-models<4,>=3.9.1 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.10.2)\n", + "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.2.0)\n", + "Requirement already satisfied: pypdfium2!=4.30.1,<5.0.0,>=4.30.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.30.0)\n", + "Requirement already satisfied: pydantic-settings<3.0.0,>=2.3.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.12.0)\n", + "Requirement already satisfied: huggingface_hub<1,>=0.23 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (0.36.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.32.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.32.5)\n", + "Requirement already satisfied: rapidocr<4.0.0,>=3.3 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.4.2)\n", + "Requirement already satisfied: certifi>=2024.7.4 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2025.11.12)\n", + "Requirement already satisfied: rtree<2.0.0,>=1.3.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.4.1)\n", + "Requirement already satisfied: typer<0.20.0,>=0.12.5 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (0.19.2)\n", + "Requirement already satisfied: python-docx<2.0.0,>=1.1.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.2.0)\n", + "Requirement already satisfied: python-pptx<2.0.0,>=1.0.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.0.2)\n", + "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.14.2)\n", + "Requirement already satisfied: pandas<3.0.0,>=2.1.4 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.2.3)\n", + "Requirement already satisfied: marko<3.0.0,>=2.1.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.2.1)\n", + "Requirement already satisfied: openpyxl<4.0.0,>=3.1.5 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.1.5)\n", + "Requirement already satisfied: lxml<7.0.0,>=4.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (6.0.2)\n", + "Requirement already satisfied: pillow<12.0.0,>=10.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (11.3.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.67.1)\n", + "Requirement already satisfied: pluggy<2.0.0,>=1.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.6.0)\n", + "Requirement already satisfied: pylatexenc<3.0,>=2.10 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.10)\n", + "Requirement already satisfied: scipy<2.0.0,>=1.6.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.16.3)\n", + "Requirement already satisfied: accelerate<2,>=1.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.12.0)\n", + "Requirement already satisfied: polyfactory>=2.22.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.1.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.2.6)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (7.1.3)\n", + "Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.9.1)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (0.7.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.12/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->docling~=2.26->langchain-docling) (2.8)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.16.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (4.25.1)\n", + "Requirement already satisfied: jsonref<2.0.0,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (1.1.0)\n", + "Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.9.0)\n", + "Requirement already satisfied: latex2mathml<4.0.0,>=3.77.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (3.78.1)\n", + "Requirement already satisfied: semchunk<3.0.0,>=2.2.0 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.2.2)\n", + "Requirement already satisfied: tree-sitter<1.0.0,>=0.23.2 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.2)\n", + "Requirement already satisfied: tree-sitter-python<1.0.0,>=0.23.6 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.0)\n", + "Requirement already satisfied: tree-sitter-c<1.0.0,>=0.23.4 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.24.1)\n", + "Requirement already satisfied: tree-sitter-java<1.0.0,>=0.23.5 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.23.5)\n", + "Requirement already satisfied: tree-sitter-javascript<1.0.0,>=0.23.1 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.0)\n", + "Requirement already satisfied: tree-sitter-typescript<1.0.0,>=0.23.2 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.23.2)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (4.57.3)\n", + "Requirement already satisfied: torchvision<1,>=0 in /usr/local/lib/python3.12/site-packages (from docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (0.24.1)\n", + "Requirement already satisfied: jsonlines<5.0.0,>=3.1.0 in /usr/local/lib/python3.12/site-packages (from docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (4.0.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (3.20.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (2025.10.0)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (1.2.0)\n", + "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.12/site-packages (from jsonlines<5.0.0,>=3.1.0->docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (25.4.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core) (3.0.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2025.9.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.37.0)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.29.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.28.1)\n", + "Requirement already satisfied: orjson>=3.9.14 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (3.11.4)\n", + "Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.0)\n", + "Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.25.0)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (4.11.0)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.9)\n", + "Requirement already satisfied: idna in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (3.11)\n", + "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (0.16.0)\n", + "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.12/site-packages (from openpyxl<4.0.0,>=3.1.5->docling~=2.26->langchain-docling) (2.0.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2025.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.41.5 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (2.41.5)\n", + "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.4.2)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.12/site-packages (from python-pptx<2.0.0,>=1.0.2->docling~=2.26->langchain-docling) (3.2.9)\n", + "Requirement already satisfied: pyclipper>=1.2.0 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (1.3.0.post6)\n", + "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (4.12.0.88)\n", + "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (1.17.0)\n", + "Requirement already satisfied: Shapely!=2.0.4,>=1.7.1 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (2.1.2)\n", + "Requirement already satisfied: omegaconf in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (2.3.0)\n", + "Requirement already satisfied: colorlog in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (6.10.1)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.2->docling~=2.26->langchain-docling) (3.4.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.2->docling~=2.26->langchain-docling) (2.5.0)\n", + "Requirement already satisfied: mpire[dill] in /usr/local/lib/python3.12/site-packages (from semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.10.2)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (80.9.0)\n", + "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.14.0)\n", + "Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.6)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.1.6)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.93)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (9.10.2.21)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.4.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (11.3.3.83)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (10.3.9.90)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (11.7.3.90)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.5.8.93)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (0.7.1)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.27.5)\n", + "Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.3.20)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.93)\n", + "Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.13.1.3)\n", + "Requirement already satisfied: triton==3.5.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.5.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2025.11.3)\n", + "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.22.1)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (8.3.1)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (14.2.0)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/site-packages (from sentence-transformers) (1.7.2)\n", + "Requirement already satisfied: grpcio!=1.68.0,!=1.68.1,!=1.69.0,!=1.70.0,!=1.70.1,!=1.71.0,!=1.72.1,!=1.73.0,>=1.66.2 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (1.76.0)\n", + "Requirement already satisfied: protobuf>=5.27.2 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (6.33.1)\n", + "Requirement already satisfied: milvus-lite>=2.4.0 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (2.5.1)\n", + "Requirement already satisfied: sqlalchemy<3.0.0,>=1.4.0 in /usr/local/lib/python3.12/site-packages (from langchain-classic) (2.0.44)\n", + "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.12/site-packages (from sqlalchemy<3.0.0,>=1.4.0->langchain-classic) (3.2.4)\n", + "Requirement already satisfied: openai<3.0.0,>=1.109.1 in /usr/local/lib/python3.12/site-packages (from langchain-openai) (2.8.1)\n", + "Requirement already satisfied: tiktoken<1.0.0,>=0.7.0 in /usr/local/lib/python3.12/site-packages (from langchain-openai) (0.12.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.10.0 in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (0.12.0)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (1.3.1)\n", + "Requirement already satisfied: faker>=5.0.0 in /usr/local/lib/python3.12/site-packages (from polyfactory>=2.22.2->docling~=2.26->langchain-docling) (38.2.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/site-packages (from rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/site-packages (from rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (2.19.2)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (0.1.2)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/site-packages (from jinja2->torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.0.3)\n", + "Requirement already satisfied: multiprocess>=0.70.15 in /usr/local/lib/python3.12/site-packages (from mpire[dill]->semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.70.18)\n", + "Requirement already satisfied: dill>=0.4.0 in /usr/local/lib/python3.12/site-packages (from multiprocess>=0.70.15->mpire[dill]->semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.4.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.12/site-packages (from omegaconf->rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (4.9.3)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn->sentence-transformers) (1.5.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn->sentence-transformers) (3.6.0)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", + "\u001b[0m" ] } ], @@ -41,14 +177,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -56,16 +192,31 @@ "source": [ "import logging\n", "import os\n", + "import requests\n", "\n", "from dotenv import load_dotenv\n", "from langchain_core.prompts import PromptTemplate\n", "\n", "load_dotenv()\n", + "api_key = os.environ.get(\"WX_API_KEY\")\n", + "project_id = os.environ.get(\"WX_PROJECT_ID\")\n", + "\n", "\n", "logging.basicConfig(level=logging.ERROR)\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to check that your .env file has been read correctly\n", + "print(api_key)\n", + "print(project_id)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -75,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -121,21 +272,86 @@ "## RAG pipeline" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### getting a token to access watsonx.ai models" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from langchain_ibm import ChatWatsonx\n", + "\n", + "def _get_iam_access_token(api_key: str) -> str:\n", + " res = requests.post(\n", + " url=\"https://iam.cloud.ibm.com/identity/token\",\n", + " headers={\n", + " \"Content-Type\": \"application/x-www-form-urlencoded\",\n", + " },\n", + " data=f\"grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey={api_key}\",\n", + " )\n", + " res.raise_for_status()\n", + " api_out = res.json()\n", + " #print(f\"{api_out=}\")\n", + " return api_out[\"access_token\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### setting the parameters to access the wx.ai model" + ] + }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], + "source": [ + "model_id = \"ibm/granite-4-h-small\"\n", + "base_url = \"https://us-south.ml.cloud.ibm.com\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### building the RAG pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "from langchain_classic.chains import create_retrieval_chain\n", "from langchain_classic.chains.combine_documents import create_stuff_documents_chain\n", - "from langchain_openai import ChatOpenAI\n", + "from langchain_ibm import ChatWatsonx\n", "\n", "def clip_text(text, limit=100):\n", " return f\"{text[:limit]}...\" if len(text) > limit else text\n", "\n", "def do_rag(*, retriever, question, lm_model_id, lm_prompt, lm_base_url=\"http://localhost:1234/v1\", lm_api_key=\"none\"):\n", - " llm = ChatOpenAI(model=lm_model_id, base_url=lm_base_url, api_key=lm_api_key)\n", + " generation_params = {\n", + " \"temperature\": 0.7, # 0.0 (deterministic) to 1.0 (creative)\n", + " \"max_tokens\": 1000, # Maximum output length\n", + " \"top_p\": 0.9, # Nucleus sampling threshold\n", + " }\n", + " llm = ChatWatsonx(\n", + " model_id=model_id,\n", + " url=base_url,\n", + " project_id=project_id,\n", + " apikey=api_key,\n", + " params=generation_params # Pass the structured params\n", + " ) \n", " question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=lm_prompt)\n", " rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n", " resp_dict = rag_chain.invoke({\"input\": question})\n", @@ -166,20 +382,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Running an end-to-end example:\n" + "Running an end-to-end example in English:\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "\u001b[32m[INFO] 2025-11-28 17:08:18,376 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,435 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,436 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,696 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,704 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,705 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,806 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,873 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-11-28 17:08:18,874 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", "Token indices sequence length is longer than the specified maximum sequence length for this model (619 > 512). Running this sequence through the model will result in indexing errors\n", - "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + "/usr/local/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", " from pkg_resources import DistributionNotFound, get_distribution\n" ] } @@ -202,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -213,10 +438,10 @@ "Briefly name the main AI models used in Docling.\n", "\n", "Answer:\n", - "The primary AI models integrated into **Docling** are:\n", + "Based on the provided context, the two main AI models used in Docling are:\n", "\n", - "1. **Layout Analysis Model** – an accurate object‑detector for identifying page elements such as headings, paragraphs, images, etc. \n", - "2. **TableFormer** – a state‑of‑the‑art table structure recognition model that extracts tables and their internal layout from documents.\n" + "1. Layout analysis model - an accurate object-detector for page elements\n", + "2. TableFormer - a state-of-the-art table structure recognition model\n" ] } ], @@ -224,14 +449,14 @@ "rag_result = do_rag(\n", " retriever=retriever,\n", " question=\"Briefly name the main AI models used in Docling.\",\n", - " lm_model_id=\"openai/gpt-oss-20b\",\n", + " lm_model_id=\"ibm/granite-4-h-small\",\n", " lm_prompt=PromptTemplate.from_template(\"Context information is below.\\n---------------------\\n{context}\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: {input}\\nAnswer:\\n\"),\n", ")" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -259,18 +484,11 @@ "source": [ "print_sources(rag_result)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "2025_12_04", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -284,9 +502,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.10" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From fd3d119c7df45a5820c575d2acb4d3b848e1795d Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Tue, 2 Dec 2025 13:19:34 +0100 Subject: [PATCH 2/3] cleanup and add options for running the lab Signed-off-by: Michele Dolfi --- workshops/2025_12_04/docling_lab_2.ipynb | 276 +++++------------------ 1 file changed, 56 insertions(+), 220 deletions(-) diff --git a/workshops/2025_12_04/docling_lab_2.ipynb b/workshops/2025_12_04/docling_lab_2.ipynb index 6574b06..5bb8f08 100644 --- a/workshops/2025_12_04/docling_lab_2.ipynb +++ b/workshops/2025_12_04/docling_lab_2.ipynb @@ -16,175 +16,31 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: langchain-docling in /usr/local/lib/python3.12/site-packages (2.0.0)\n", - "Requirement already satisfied: langchain-core in /usr/local/lib/python3.12/site-packages (1.1.0)\n", - "Requirement already satisfied: langchain-huggingface in /usr/local/lib/python3.12/site-packages (1.1.0)\n", - "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.12/site-packages (5.1.2)\n", - "Requirement already satisfied: langchain_milvus in /usr/local/lib/python3.12/site-packages (0.3.0)\n", - "Requirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.12/site-packages (1.0.0)\n", - "Requirement already satisfied: langchain-classic in /usr/local/lib/python3.12/site-packages (1.0.0)\n", - "Requirement already satisfied: langchain-openai in /usr/local/lib/python3.12/site-packages (1.1.0)\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.12/site-packages (1.2.1)\n", - "Requirement already satisfied: pymilvus[milvus_lite] in /usr/local/lib/python3.12/site-packages (2.6.4)\n", - "Requirement already satisfied: docling~=2.26 in /usr/local/lib/python3.12/site-packages (from langchain-docling) (2.63.0)\n", - "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (1.33)\n", - "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /usr/local/lib/python3.12/site-packages (from langchain-core) (0.4.49)\n", - "Requirement already satisfied: packaging<26.0.0,>=23.2.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (25.0)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.12/site-packages (from langchain-core) (2.12.5)\n", - "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (6.0.3)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (9.1.2)\n", - "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /usr/local/lib/python3.12/site-packages (from langchain-core) (4.15.0)\n", - "Requirement already satisfied: docling-core<3.0.0,>=2.50.1 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.53.0)\n", - "Requirement already satisfied: docling-parse<5.0.0,>=4.7.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.7.1)\n", - "Requirement already satisfied: docling-ibm-models<4,>=3.9.1 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.10.2)\n", - "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.2.0)\n", - "Requirement already satisfied: pypdfium2!=4.30.1,<5.0.0,>=4.30.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.30.0)\n", - "Requirement already satisfied: pydantic-settings<3.0.0,>=2.3.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.12.0)\n", - "Requirement already satisfied: huggingface_hub<1,>=0.23 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (0.36.0)\n", - "Requirement already satisfied: requests<3.0.0,>=2.32.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.32.5)\n", - "Requirement already satisfied: rapidocr<4.0.0,>=3.3 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.4.2)\n", - "Requirement already satisfied: certifi>=2024.7.4 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2025.11.12)\n", - "Requirement already satisfied: rtree<2.0.0,>=1.3.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.4.1)\n", - "Requirement already satisfied: typer<0.20.0,>=0.12.5 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (0.19.2)\n", - "Requirement already satisfied: python-docx<2.0.0,>=1.1.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.2.0)\n", - "Requirement already satisfied: python-pptx<2.0.0,>=1.0.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.0.2)\n", - "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.14.2)\n", - "Requirement already satisfied: pandas<3.0.0,>=2.1.4 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.2.3)\n", - "Requirement already satisfied: marko<3.0.0,>=2.1.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.2.1)\n", - "Requirement already satisfied: openpyxl<4.0.0,>=3.1.5 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.1.5)\n", - "Requirement already satisfied: lxml<7.0.0,>=4.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (6.0.2)\n", - "Requirement already satisfied: pillow<12.0.0,>=10.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (11.3.0)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (4.67.1)\n", - "Requirement already satisfied: pluggy<2.0.0,>=1.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.6.0)\n", - "Requirement already satisfied: pylatexenc<3.0,>=2.10 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (2.10)\n", - "Requirement already satisfied: scipy<2.0.0,>=1.6.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.16.3)\n", - "Requirement already satisfied: accelerate<2,>=1.0.0 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (1.12.0)\n", - "Requirement already satisfied: polyfactory>=2.22.2 in /usr/local/lib/python3.12/site-packages (from docling~=2.26->langchain-docling) (3.1.0)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.2.6)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (7.1.3)\n", - "Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.9.1)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/site-packages (from accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (0.7.0)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.12/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->docling~=2.26->langchain-docling) (2.8)\n", - "Requirement already satisfied: jsonschema<5.0.0,>=4.16.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (4.25.1)\n", - "Requirement already satisfied: jsonref<2.0.0,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (1.1.0)\n", - "Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.9.0)\n", - "Requirement already satisfied: latex2mathml<4.0.0,>=3.77.0 in /usr/local/lib/python3.12/site-packages (from docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (3.78.1)\n", - "Requirement already satisfied: semchunk<3.0.0,>=2.2.0 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.2.2)\n", - "Requirement already satisfied: tree-sitter<1.0.0,>=0.23.2 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.2)\n", - "Requirement already satisfied: tree-sitter-python<1.0.0,>=0.23.6 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.0)\n", - "Requirement already satisfied: tree-sitter-c<1.0.0,>=0.23.4 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.24.1)\n", - "Requirement already satisfied: tree-sitter-java<1.0.0,>=0.23.5 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.23.5)\n", - "Requirement already satisfied: tree-sitter-javascript<1.0.0,>=0.23.1 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.25.0)\n", - "Requirement already satisfied: tree-sitter-typescript<1.0.0,>=0.23.2 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.23.2)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.12/site-packages (from docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (4.57.3)\n", - "Requirement already satisfied: torchvision<1,>=0 in /usr/local/lib/python3.12/site-packages (from docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (0.24.1)\n", - "Requirement already satisfied: jsonlines<5.0.0,>=3.1.0 in /usr/local/lib/python3.12/site-packages (from docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (4.0.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (3.20.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (2025.10.0)\n", - "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/site-packages (from huggingface_hub<1,>=0.23->docling~=2.26->langchain-docling) (1.2.0)\n", - "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.12/site-packages (from jsonlines<5.0.0,>=3.1.0->docling-ibm-models<4,>=3.9.1->docling~=2.26->langchain-docling) (25.4.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.12/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core) (3.0.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2025.9.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.37.0)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.12/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core<3.0.0,>=2.50.1->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.29.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.28.1)\n", - "Requirement already satisfied: orjson>=3.9.14 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (3.11.4)\n", - "Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.0)\n", - "Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.12/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.25.0)\n", - "Requirement already satisfied: anyio in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (4.11.0)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.9)\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (3.11)\n", - "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (0.16.0)\n", - "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.12/site-packages (from openpyxl<4.0.0,>=3.1.5->docling~=2.26->langchain-docling) (2.0.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/site-packages (from pandas<3.0.0,>=2.1.4->docling~=2.26->langchain-docling) (2025.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.41.5 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (2.41.5)\n", - "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.4.2)\n", - "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.12/site-packages (from python-pptx<2.0.0,>=1.0.2->docling~=2.26->langchain-docling) (3.2.9)\n", - "Requirement already satisfied: pyclipper>=1.2.0 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (1.3.0.post6)\n", - "Requirement already satisfied: opencv-python>=4.5.1.48 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (4.12.0.88)\n", - "Requirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (1.17.0)\n", - "Requirement already satisfied: Shapely!=2.0.4,>=1.7.1 in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (2.1.2)\n", - "Requirement already satisfied: omegaconf in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (2.3.0)\n", - "Requirement already satisfied: colorlog in /usr/local/lib/python3.12/site-packages (from rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (6.10.1)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.2->docling~=2.26->langchain-docling) (3.4.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/site-packages (from requests<3.0.0,>=2.32.2->docling~=2.26->langchain-docling) (2.5.0)\n", - "Requirement already satisfied: mpire[dill] in /usr/local/lib/python3.12/site-packages (from semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2.10.2)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (80.9.0)\n", - "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.14.0)\n", - "Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.6)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.1.6)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.93)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (9.10.2.21)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.4.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (11.3.3.83)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (10.3.9.90)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (11.7.3.90)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.5.8.93)\n", - "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (0.7.1)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (2.27.5)\n", - "Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.3.20)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.90)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (12.8.93)\n", - "Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.13.1.3)\n", - "Requirement already satisfied: triton==3.5.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.5.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (2025.11.3)\n", - "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.22.1)\n", - "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (8.3.1)\n", - "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (1.5.4)\n", - "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.12/site-packages (from typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (14.2.0)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/site-packages (from sentence-transformers) (1.7.2)\n", - "Requirement already satisfied: grpcio!=1.68.0,!=1.68.1,!=1.69.0,!=1.70.0,!=1.70.1,!=1.71.0,!=1.72.1,!=1.73.0,>=1.66.2 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (1.76.0)\n", - "Requirement already satisfied: protobuf>=5.27.2 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (6.33.1)\n", - "Requirement already satisfied: milvus-lite>=2.4.0 in /usr/local/lib/python3.12/site-packages (from pymilvus[milvus_lite]) (2.5.1)\n", - "Requirement already satisfied: sqlalchemy<3.0.0,>=1.4.0 in /usr/local/lib/python3.12/site-packages (from langchain-classic) (2.0.44)\n", - "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.12/site-packages (from sqlalchemy<3.0.0,>=1.4.0->langchain-classic) (3.2.4)\n", - "Requirement already satisfied: openai<3.0.0,>=1.109.1 in /usr/local/lib/python3.12/site-packages (from langchain-openai) (2.8.1)\n", - "Requirement already satisfied: tiktoken<1.0.0,>=0.7.0 in /usr/local/lib/python3.12/site-packages (from langchain-openai) (0.12.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.10.0 in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (0.12.0)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.12/site-packages (from openai<3.0.0,>=1.109.1->langchain-openai) (1.3.1)\n", - "Requirement already satisfied: faker>=5.0.0 in /usr/local/lib/python3.12/site-packages (from polyfactory>=2.22.2->docling~=2.26->langchain-docling) (38.2.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/site-packages (from rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (4.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/site-packages (from rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (2.19.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<0.20.0,>=0.12.5->docling~=2.26->langchain-docling) (0.1.2)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (1.3.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/site-packages (from jinja2->torch>=2.0.0->accelerate<2,>=1.0.0->docling~=2.26->langchain-docling) (3.0.3)\n", - "Requirement already satisfied: multiprocess>=0.70.15 in /usr/local/lib/python3.12/site-packages (from mpire[dill]->semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.70.18)\n", - "Requirement already satisfied: dill>=0.4.0 in /usr/local/lib/python3.12/site-packages (from multiprocess>=0.70.15->mpire[dill]->semchunk<3.0.0,>=2.2.0->docling-core[chunking]<3.0.0,>=2.50.1->docling~=2.26->langchain-docling) (0.4.0)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.12/site-packages (from omegaconf->rapidocr<4.0.0,>=3.3->docling~=2.26->langchain-docling) (4.9.3)\n", - "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn->sentence-transformers) (1.5.2)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn->sentence-transformers) (3.6.0)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", - "\u001b[0m" + "\u001b[2mAudited \u001b[1m11 packages\u001b[0m \u001b[2min 98ms\u001b[0m\u001b[0m\n" ] } ], "source": [ - "!uv pip install langchain-docling langchain-core langchain-huggingface sentence-transformers langchain_milvus \"pymilvus[milvus_lite]\" langchain-text-splitters langchain-classic langchain-openai python-dotenv" + "!uv pip install langchain-docling langchain-core langchain-huggingface sentence-transformers langchain_milvus langchain-ibm \"pymilvus[milvus_lite]\" langchain-text-splitters langchain-classic langchain-openai python-dotenv" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -192,31 +48,13 @@ "source": [ "import logging\n", "import os\n", - "import requests\n", "\n", - "from dotenv import load_dotenv\n", "from langchain_core.prompts import PromptTemplate\n", "\n", - "load_dotenv()\n", - "api_key = os.environ.get(\"WX_API_KEY\")\n", - "project_id = os.environ.get(\"WX_PROJECT_ID\")\n", - "\n", - "\n", "logging.basicConfig(level=logging.ERROR)\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# to check that your .env file has been read correctly\n", - "print(api_key)\n", - "print(project_id)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -276,7 +114,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### getting a token to access watsonx.ai models" + "### Choice of LLM runtime\n", + "\n", + "In the Generation step of the RAG pipeline we will invoke a model. Below are a few possibilties for defining the LLM:\n", + "\n", + "1. Using a local LLM engine, e.g. LM Studio, Ollama, etc. See the `get_generic_openai_api_llm()` method.\n", + "2. Using a remote LLM inference server, e.g. watsonx.ai. In this case you will might need credentials. See the `get_watsonx_llm()` method." ] }, { @@ -285,28 +128,13 @@ "metadata": {}, "outputs": [], "source": [ + "# Default parameters match to a local LM Studio instance\n", "\n", - "from langchain_ibm import ChatWatsonx\n", - "\n", - "def _get_iam_access_token(api_key: str) -> str:\n", - " res = requests.post(\n", - " url=\"https://iam.cloud.ibm.com/identity/token\",\n", - " headers={\n", - " \"Content-Type\": \"application/x-www-form-urlencoded\",\n", - " },\n", - " data=f\"grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey={api_key}\",\n", - " )\n", - " res.raise_for_status()\n", - " api_out = res.json()\n", - " #print(f\"{api_out=}\")\n", - " return api_out[\"access_token\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### setting the parameters to access the wx.ai model" + "def get_generic_openai_api_llm(lm_model_id=\"ibm/granite-4-h-small\", lm_base_url=\"http://localhost:1234/v1\", lm_api_key=\"none\"):\n", + " from langchain_openai import ChatOpenAI \n", + " \n", + " llm = ChatOpenAI(model=lm_model_id, base_url=lm_base_url, api_key=lm_api_key)\n", + " return llm" ] }, { @@ -315,15 +143,39 @@ "metadata": {}, "outputs": [], "source": [ - "model_id = \"ibm/granite-4-h-small\"\n", - "base_url = \"https://us-south.ml.cloud.ibm.com\"" + "def get_watsonx_llm():\n", + " model_id = \"ibm/granite-4-h-small\"\n", + " base_url = \"https://us-south.ml.cloud.ibm.com\"\n", + "\n", + " from langchain_ibm import ChatWatsonx\n", + " from dotenv import load_dotenv\n", + "\n", + " load_dotenv()\n", + " api_key = os.environ.get(\"WX_API_KEY\")\n", + " project_id = os.environ.get(\"WX_PROJECT_ID\")\n", + " if api_key is None or project_id is None:\n", + " raise RuntimeError(\"An API key for watsonx is required to run this part of the notebook. Please set WX_API_KEY and WX_PROJECT_ID in the .env file.\")\n", + "\n", + " generation_params = {\n", + " \"temperature\": 0.7, # 0.0 (deterministic) to 1.0 (creative)\n", + " \"max_tokens\": 1000, # Maximum output length\n", + " \"top_p\": 0.9, # Nucleus sampling threshold\n", + " }\n", + " llm = ChatWatsonx(\n", + " model_id=model_id,\n", + " url=base_url,\n", + " project_id=project_id,\n", + " apikey=api_key,\n", + " params=generation_params # Pass the structured params\n", + " )\n", + " return llm\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### building the RAG pipeline" + "### Defining the RAG pipeline" ] }, { @@ -334,29 +186,15 @@ "source": [ "from langchain_classic.chains import create_retrieval_chain\n", "from langchain_classic.chains.combine_documents import create_stuff_documents_chain\n", - "from langchain_ibm import ChatWatsonx\n", "\n", "def clip_text(text, limit=100):\n", " return f\"{text[:limit]}...\" if len(text) > limit else text\n", "\n", - "def do_rag(*, retriever, question, lm_model_id, lm_prompt, lm_base_url=\"http://localhost:1234/v1\", lm_api_key=\"none\"):\n", - " generation_params = {\n", - " \"temperature\": 0.7, # 0.0 (deterministic) to 1.0 (creative)\n", - " \"max_tokens\": 1000, # Maximum output length\n", - " \"top_p\": 0.9, # Nucleus sampling threshold\n", - " }\n", - " llm = ChatWatsonx(\n", - " model_id=model_id,\n", - " url=base_url,\n", - " project_id=project_id,\n", - " apikey=api_key,\n", - " params=generation_params # Pass the structured params\n", - " ) \n", + "def do_rag(*, retriever, question, llm, lm_prompt):\n", " question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=lm_prompt)\n", " rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n", " resp_dict = rag_chain.invoke({\"input\": question})\n", "\n", - "\n", " print(f\"Question:\\n{resp_dict['input']}\\n\\nAnswer:\\n{clip_text(resp_dict['answer'], limit=1000)}\")\n", " return resp_dict\n", "\n", @@ -394,17 +232,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m[INFO] 2025-11-28 17:08:18,376 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,435 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,436 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,696 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,704 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,705 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,806 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,873 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", - "\u001b[32m[INFO] 2025-11-28 17:08:18,874 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", "Token indices sequence length is longer than the specified maximum sequence length for this model (619 > 512). Running this sequence through the model will result in indexing errors\n", - "/usr/local/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", " from pkg_resources import DistributionNotFound, get_distribution\n" ] } @@ -440,8 +269,9 @@ "Answer:\n", "Based on the provided context, the two main AI models used in Docling are:\n", "\n", - "1. Layout analysis model - an accurate object-detector for page elements\n", - "2. TableFormer - a state-of-the-art table structure recognition model\n" + "1. Layout analysis model - an accurate object-detector for page elements.\n", + "\n", + "2. TableFormer - a state-of-the-art table structure recognition model.\n" ] } ], @@ -449,7 +279,13 @@ "rag_result = do_rag(\n", " retriever=retriever,\n", " question=\"Briefly name the main AI models used in Docling.\",\n", - " lm_model_id=\"ibm/granite-4-h-small\",\n", + "\n", + " # using a local model\n", + " # llm=get_generic_openai_api_llm(),\n", + "\n", + " # using watsonx.ai\n", + " llm=get_watsonx_llm(),\n", + "\n", " lm_prompt=PromptTemplate.from_template(\"Context information is below.\\n---------------------\\n{context}\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: {input}\\nAnswer:\\n\"),\n", ")" ] From f58270ccf03f2ada18102185e0530cf8914e46f3 Mon Sep 17 00:00:00 2001 From: nsapin34 Date: Tue, 2 Dec 2025 18:08:35 +0100 Subject: [PATCH 3/3] Update docling_lab_2.ipynb Signed-off-by: nsapin34 --- workshops/2025_12_04/docling_lab_2.ipynb | 142 ++++++++++++----------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/workshops/2025_12_04/docling_lab_2.ipynb b/workshops/2025_12_04/docling_lab_2.ipynb index 5bb8f08..80f55ae 100644 --- a/workshops/2025_12_04/docling_lab_2.ipynb +++ b/workshops/2025_12_04/docling_lab_2.ipynb @@ -23,12 +23,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2mAudited \u001b[1m11 packages\u001b[0m \u001b[2min 98ms\u001b[0m\u001b[0m\n" + "/usr/bin/sh: 1: uv: not found\n" ] } ], "source": [ - "!uv pip install langchain-docling langchain-core langchain-huggingface sentence-transformers langchain_milvus langchain-ibm \"pymilvus[milvus_lite]\" langchain-text-splitters langchain-classic langchain-openai python-dotenv" + "!uv pip install langchain-docling langchain-core langchain-huggingface sentence-transformers langchain_milvus \"pymilvus[milvus_lite]\" langchain-text-splitters langchain-classic langchain-openai python-dotenv langchain_ibm" ] }, { @@ -40,7 +40,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -48,13 +48,31 @@ "source": [ "import logging\n", "import os\n", + "import requests\n", "\n", + "from dotenv import load_dotenv\n", "from langchain_core.prompts import PromptTemplate\n", "\n", + "load_dotenv()\n", + "api_key = os.environ.get(\"WX_API_KEY\")\n", + "project_id = os.environ.get(\"WX_PROJECT_ID\")\n", + "\n", + "\n", "logging.basicConfig(level=logging.ERROR)\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to check that your .env file has been read correctly\n", + "print(api_key)\n", + "print(project_id)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -64,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -114,27 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Choice of LLM runtime\n", - "\n", - "In the Generation step of the RAG pipeline we will invoke a model. Below are a few possibilties for defining the LLM:\n", - "\n", - "1. Using a local LLM engine, e.g. LM Studio, Ollama, etc. See the `get_generic_openai_api_llm()` method.\n", - "2. Using a remote LLM inference server, e.g. watsonx.ai. In this case you will might need credentials. See the `get_watsonx_llm()` method." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Default parameters match to a local LM Studio instance\n", - "\n", - "def get_generic_openai_api_llm(lm_model_id=\"ibm/granite-4-h-small\", lm_base_url=\"http://localhost:1234/v1\", lm_api_key=\"none\"):\n", - " from langchain_openai import ChatOpenAI \n", - " \n", - " llm = ChatOpenAI(model=lm_model_id, base_url=lm_base_url, api_key=lm_api_key)\n", - " return llm" + "#### setting the parameters to access the wx.ai model" ] }, { @@ -143,39 +141,17 @@ "metadata": {}, "outputs": [], "source": [ - "def get_watsonx_llm():\n", - " model_id = \"ibm/granite-4-h-small\"\n", - " base_url = \"https://us-south.ml.cloud.ibm.com\"\n", - "\n", - " from langchain_ibm import ChatWatsonx\n", - " from dotenv import load_dotenv\n", - "\n", - " load_dotenv()\n", - " api_key = os.environ.get(\"WX_API_KEY\")\n", - " project_id = os.environ.get(\"WX_PROJECT_ID\")\n", - " if api_key is None or project_id is None:\n", - " raise RuntimeError(\"An API key for watsonx is required to run this part of the notebook. Please set WX_API_KEY and WX_PROJECT_ID in the .env file.\")\n", + "from langchain_ibm import ChatWatsonx\n", "\n", - " generation_params = {\n", - " \"temperature\": 0.7, # 0.0 (deterministic) to 1.0 (creative)\n", - " \"max_tokens\": 1000, # Maximum output length\n", - " \"top_p\": 0.9, # Nucleus sampling threshold\n", - " }\n", - " llm = ChatWatsonx(\n", - " model_id=model_id,\n", - " url=base_url,\n", - " project_id=project_id,\n", - " apikey=api_key,\n", - " params=generation_params # Pass the structured params\n", - " )\n", - " return llm\n" + "model_id = \"ibm/granite-4-h-small\"\n", + "base_url = \"https://us-south.ml.cloud.ibm.com\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Defining the RAG pipeline" + "#### building the RAG pipeline" ] }, { @@ -186,15 +162,29 @@ "source": [ "from langchain_classic.chains import create_retrieval_chain\n", "from langchain_classic.chains.combine_documents import create_stuff_documents_chain\n", + "from langchain_ibm import ChatWatsonx\n", "\n", "def clip_text(text, limit=100):\n", " return f\"{text[:limit]}...\" if len(text) > limit else text\n", "\n", - "def do_rag(*, retriever, question, llm, lm_prompt):\n", + "def do_rag(*, retriever, question, lm_model_id, lm_prompt, lm_base_url=\"http://localhost:1234/v1\", lm_api_key=\"none\"):\n", + " generation_params = {\n", + " \"temperature\": 0.7, # 0.0 (deterministic) to 1.0 (creative)\n", + " \"max_tokens\": 1000, # Maximum output length\n", + " \"top_p\": 0.9, # Nucleus sampling threshold\n", + " }\n", + " llm = ChatWatsonx(\n", + " model_id=model_id,\n", + " url=base_url,\n", + " project_id=project_id,\n", + " apikey=api_key,\n", + " params=generation_params # Pass the structured params\n", + " ) \n", " question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=lm_prompt)\n", " rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n", " resp_dict = rag_chain.invoke({\"input\": question})\n", "\n", + "\n", " print(f\"Question:\\n{resp_dict['input']}\\n\\nAnswer:\\n{clip_text(resp_dict['answer'], limit=1000)}\")\n", " return resp_dict\n", "\n", @@ -225,15 +215,33 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "\u001b[32m[INFO] 2025-12-02 17:01:05,967 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:05,978 [RapidOCR] download_file.py:68: Initiating download: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/torch/PP-OCRv4/det/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:06,765 [RapidOCR] download_file.py:82: Download size: 13.83MB\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:07,396 [RapidOCR] download_file.py:95: Successfully saved to: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:07,399 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:07,823 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:07,824 [RapidOCR] download_file.py:68: Initiating download: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/torch/PP-OCRv4/cls/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:08,824 [RapidOCR] download_file.py:82: Download size: 0.56MB\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:08,881 [RapidOCR] download_file.py:95: Successfully saved to: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:08,883 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:08,947 [RapidOCR] base.py:22: Using engine_name: torch\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:08,947 [RapidOCR] download_file.py:68: Initiating download: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/torch/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:09,855 [RapidOCR] download_file.py:82: Download size: 25.67MB\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:10,924 [RapidOCR] download_file.py:95: Successfully saved to: /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:01:10,928 [RapidOCR] torch.py:54: Using /usr/local/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:02:31,866 [RapidOCR] download_file.py:68: Initiating download: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/resources/fonts/FZYTK.TTF\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:02:32,811 [RapidOCR] download_file.py:82: Download size: 3.09MB\u001b[0m\n", + "\u001b[32m[INFO] 2025-12-02 17:02:32,979 [RapidOCR] download_file.py:95: Successfully saved to: /usr/local/lib/python3.12/site-packages/rapidocr/models/FZYTK.TTF\u001b[0m\n", "Token indices sequence length is longer than the specified maximum sequence length for this model (619 > 512). Running this sequence through the model will result in indexing errors\n", - "/Users/dol/codes/docling-workshops/workshops/2025_12_04/.venv/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + "/usr/local/lib/python3.12/site-packages/milvus_lite/__init__.py:15: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", " from pkg_resources import DistributionNotFound, get_distribution\n" ] } @@ -256,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -269,9 +277,8 @@ "Answer:\n", "Based on the provided context, the two main AI models used in Docling are:\n", "\n", - "1. Layout analysis model - an accurate object-detector for page elements.\n", - "\n", - "2. TableFormer - a state-of-the-art table structure recognition model.\n" + "1. Layout analysis model - an accurate object-detector for page elements\n", + "2. TableFormer - a state-of-the-art table structure recognition model\n" ] } ], @@ -279,20 +286,14 @@ "rag_result = do_rag(\n", " retriever=retriever,\n", " question=\"Briefly name the main AI models used in Docling.\",\n", - "\n", - " # using a local model\n", - " # llm=get_generic_openai_api_llm(),\n", - "\n", - " # using watsonx.ai\n", - " llm=get_watsonx_llm(),\n", - "\n", + " lm_model_id=\"ibm/granite-4-h-small\",\n", " lm_prompt=PromptTemplate.from_template(\"Context information is below.\\n---------------------\\n{context}\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: {input}\\nAnswer:\\n\"),\n", ")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -320,6 +321,13 @@ "source": [ "print_sources(rag_result)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -338,7 +346,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.12.12" } }, "nbformat": 4,