diff --git a/docs/faq/faq-oss.mdx b/docs/faq/faq-oss.mdx index 71675ed..ce1b1be 100644 --- a/docs/faq/faq-oss.mdx +++ b/docs/faq/faq-oss.mdx @@ -46,7 +46,20 @@ For large-scale (>1M) or higher dimension vectors, it is beneficial to create a ### How can I speed up data inserts? -It's highly recommended to perform bulk inserts via batches (for e.g., Pandas DataFrames or lists of dicts in Python) to speed up inserts for large datasets. Inserting records one at a time is slow and can result in suboptimal performance because each insert creates a new data fragment on disk. Batching inserts allows LanceDB to create larger fragments (and their associated manifests), which are more efficient to read and write. +LanceDB auto-parallelizes large writes when you call `table.add()` with materialized +data such as `pa.Table`, `pd.DataFrame`, or `pa.dataset()`. No extra configuration +is needed — writes are automatically split into partitions of ~1M rows or 2GB. + +For best results: + +- **Create an empty table first**, then call `table.add()`. The `add()` path enables + automatic write parallelism, while passing data directly to `create_table()` does not. +- **For file-based data**, use `pyarrow.dataset.dataset("path/to/data/", format="parquet")` + so LanceDB can stream from disk without loading everything into memory. +- **Avoid inserting one row at a time.** Each insert creates a new data fragment on + disk. Batch your data into Arrow tables, DataFrames, or use iterators. + +See [Loading Large Datasets](/tables/create#loading-large-datasets) for full examples. ### Do I need to set a refine factor when using an index? diff --git a/docs/snippets/quickstart.mdx b/docs/snippets/quickstart.mdx index bd817e6..f4272aa 100644 --- a/docs/snippets/quickstart.mdx +++ b/docs/snippets/quickstart.mdx @@ -18,6 +18,8 @@ export const PyQuickstartVectorSearch1Async = "# Let's search for vectors simila export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n"; +export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n"; + export const TsQuickstartAddData = "const moreData = [\n { id: \"7\", text: \"mage\", vector: [0.6, 0.3, 0.4] },\n { id: \"8\", text: \"bard\", vector: [0.3, 0.8, 0.4] },\n];\n\n// Add data to table\nawait table.add(moreData);\n"; export const TsQuickstartCreateTable = "const data = [\n { id: \"1\", text: \"knight\", vector: [0.9, 0.4, 0.8] },\n { id: \"2\", text: \"ranger\", vector: [0.8, 0.4, 0.7] },\n { id: \"9\", text: \"priest\", vector: [0.6, 0.2, 0.6] },\n { id: \"4\", text: \"rogue\", vector: [0.7, 0.4, 0.7] },\n];\nlet table = await db.createTable(\"adventurers\", data, { mode: \"overwrite\" });\n"; diff --git a/docs/snippets/tables.mdx b/docs/snippets/tables.mdx index fdf5ed7..f9d48d1 100644 --- a/docs/snippets/tables.mdx +++ b/docs/snippets/tables.mdx @@ -12,6 +12,8 @@ export const PyAddDataPydanticModel = "from lancedb.pydantic import LanceModel, export const PyAddDataToTable = "import pyarrow as pa\n\n# create an empty table with schema\ndata = [\n {\"vector\": [3.1, 4.1], \"item\": \"foo\", \"price\": 10.0},\n {\"vector\": [5.9, 26.5], \"item\": \"bar\", \"price\": 20.0},\n {\"vector\": [10.2, 100.8], \"item\": \"baz\", \"price\": 30.0},\n {\"vector\": [1.4, 9.5], \"item\": \"fred\", \"price\": 40.0},\n]\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.utf8()),\n pa.field(\"price\", pa.float32()),\n ]\n)\n\ntable_name = \"basic_ingestion_example\"\ntable = db.create_table(table_name, schema=schema, mode=\"overwrite\")\n# Add data\ntable.add(data)\n"; +export const PyAddFromDataset = "import pyarrow.dataset as ds\n\ndataset = ds.dataset(data_path, format=\"parquet\")\ndb = tmp_db\ntable = db.create_table(\"my_table\", schema=dataset.schema, mode=\"overwrite\")\ntable.add(dataset)\n"; + export const PyAlterColumnsDataType = "# Change price from int32 to int64 for larger numbers\ntable.alter_columns({\"path\": \"price\", \"data_type\": pa.int64()})\n"; export const PyAlterColumnsMultiple = "# Rename, change type, and make nullable in one operation\ntable.alter_columns(\n {\n \"path\": \"sale_price\",\n \"rename\": \"final_price\",\n \"data_type\": pa.float64(),\n \"nullable\": True,\n }\n)\n"; @@ -24,13 +26,13 @@ export const PyAlterColumnsWithExpression = "# For custom transforms, create a n export const PyAlterVectorColumn = "vector_dim = 768 # Your embedding dimension\ntable_name = \"vector_alter_example\"\ndb = tmp_db\ndata = [\n {\n \"id\": 1,\n \"embedding\": np.random.random(vector_dim).tolist(),\n },\n]\ntable = db.create_table(table_name, data, mode=\"overwrite\")\n\ntable.alter_columns(\n dict(path=\"embedding\", data_type=pa.list_(pa.float32(), vector_dim))\n)\n"; -export const PyBatchDataInsertion = "import pyarrow as pa\n\ndef make_batches():\n for i in range(5): # Create 5 batches\n yield pa.RecordBatch.from_arrays(\n [\n pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)),\n pa.array([f\"item{i*2+1}\", f\"item{i*2+2}\"]),\n pa.array([float((i * 2 + 1) * 10), float((i * 2 + 2) * 10)]),\n ],\n [\"vector\", \"item\", \"price\"],\n )\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.utf8()),\n pa.field(\"price\", pa.float32()),\n ]\n)\n# Create table with batches\ntable_name = \"batch_ingestion_example\"\ntable = db.create_table(table_name, make_batches(), schema=schema, mode=\"overwrite\")\n"; +export const PyBatchDataInsertion = "import pyarrow as pa\n\ndef make_batches():\n for i in range(5): # Create 5 batches\n yield pa.RecordBatch.from_arrays(\n [\n pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)),\n pa.array([f\"item{i * 2 + 1}\", f\"item{i * 2 + 2}\"]),\n pa.array([float((i * 2 + 1) * 10), float((i * 2 + 2) * 10)]),\n ],\n [\"vector\", \"item\", \"price\"],\n )\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.utf8()),\n pa.field(\"price\", pa.float32()),\n ]\n)\n# Create table with batches\ntable_name = \"batch_ingestion_example\"\ntable = db.create_table(table_name, make_batches(), schema=schema, mode=\"overwrite\")\n"; -export const PyConsistencyCheckoutLatest = "uri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri)\nwriter_table = writer_db.create_table(\"consistency_checkout_latest_table\", [{\"id\": 1}], mode=\"overwrite\")\nreader_table = reader_db.open_table(\"consistency_checkout_latest_table\")\n\nwriter_table.add([{\"id\": 2}])\nrows_before_refresh = reader_table.count_rows()\nprint(f\"Rows before checkout_latest: {rows_before_refresh}\")\n\nreader_table.checkout_latest()\nrows_after_refresh = reader_table.count_rows()\nprint(f\"Rows after checkout_latest: {rows_after_refresh}\")\n"; +export const PyConsistencyCheckoutLatest = "uri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri)\nwriter_table = writer_db.create_table(\n \"consistency_checkout_latest_table\", [{\"id\": 1}], mode=\"overwrite\"\n)\nreader_table = reader_db.open_table(\"consistency_checkout_latest_table\")\n\nwriter_table.add([{\"id\": 2}])\nrows_before_refresh = reader_table.count_rows()\nprint(f\"Rows before checkout_latest: {rows_before_refresh}\")\n\nreader_table.checkout_latest()\nrows_after_refresh = reader_table.count_rows()\nprint(f\"Rows after checkout_latest: {rows_after_refresh}\")\n"; -export const PyConsistencyEventual = "from datetime import timedelta\n\nuri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=3600))\nwriter_table = writer_db.create_table(\"consistency_eventual_table\", [{\"id\": 1}], mode=\"overwrite\")\nreader_table = reader_db.open_table(\"consistency_eventual_table\")\nwriter_table.add([{\"id\": 2}])\nrows_after_write = reader_table.count_rows()\nprint(f\"Rows visible before eventual refresh interval: {rows_after_write}\")\n"; +export const PyConsistencyEventual = "from datetime import timedelta\n\nuri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=3600))\nwriter_table = writer_db.create_table(\n \"consistency_eventual_table\", [{\"id\": 1}], mode=\"overwrite\"\n)\nreader_table = reader_db.open_table(\"consistency_eventual_table\")\nwriter_table.add([{\"id\": 2}])\nrows_after_write = reader_table.count_rows()\nprint(f\"Rows visible before eventual refresh interval: {rows_after_write}\")\n"; -export const PyConsistencyStrong = "from datetime import timedelta\n\nuri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri, read_consistency_interval=timedelta(0))\nwriter_table = writer_db.create_table(\"consistency_strong_table\", [{\"id\": 1}], mode=\"overwrite\")\nreader_table = reader_db.open_table(\"consistency_strong_table\")\nwriter_table.add([{\"id\": 2}])\nrows_after_write = reader_table.count_rows()\nprint(f\"Rows visible with strong consistency: {rows_after_write}\")\n"; +export const PyConsistencyStrong = "from datetime import timedelta\n\nuri = str(tmp_db.uri)\nwriter_db = lancedb.connect(uri)\nreader_db = lancedb.connect(uri, read_consistency_interval=timedelta(0))\nwriter_table = writer_db.create_table(\n \"consistency_strong_table\", [{\"id\": 1}], mode=\"overwrite\"\n)\nreader_table = reader_db.open_table(\"consistency_strong_table\")\nwriter_table.add([{\"id\": 2}])\nrows_after_write = reader_table.count_rows()\nprint(f\"Rows visible with strong consistency: {rows_after_write}\")\n"; export const PyCreateEmptyTable = "import pyarrow as pa\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.string()),\n pa.field(\"price\", pa.float32()),\n ]\n)\ndb = tmp_db\ntbl = db.create_table(\"test_empty_table\", schema=schema, mode=\"overwrite\")\n"; @@ -60,11 +62,11 @@ export const PyDropColumnsSingle = "# Remove the first temporary column\ntable.d export const PyDropTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5}]\ndb.create_table(\"my_table\", data, mode=\"overwrite\")\n\n# Drop the table\ndb.drop_table(\"my_table\")\n"; -export const PyInsertIfNotExists = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_not_matched_insert_all()\n .execute(incoming_users)\n)\n"; +export const PyInsertIfNotExists = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(table.merge_insert(\"id\").when_not_matched_insert_all().execute(incoming_users))\n"; export const PyMergeDeleteMissingBySource = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2, 3],\n \"name\": [\"Alice\", \"Bob\", \"Charlie\"],\n \"login_count\": [10, 20, 5],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .when_not_matched_by_source_delete()\n .execute(incoming_users)\n)\n"; -export const PyMergeMatchedUpdateOnly = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .execute(incoming_users)\n)\n"; +export const PyMergeMatchedUpdateOnly = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(table.merge_insert(\"id\").when_matched_update_all().execute(incoming_users))\n"; export const PyMergePartialColumns = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .execute(incoming_users)\n)\n"; diff --git a/docs/tables/create.mdx b/docs/tables/create.mdx index 28ad054..ab56dd8 100644 --- a/docs/tables/create.mdx +++ b/docs/tables/create.mdx @@ -19,6 +19,7 @@ import { RsCreateTableFromArrow as RsCreateTableFromArrow, PyCreateTableFromPydantic as CreateTableFromPydantic, PyCreateTableNestedSchema as CreateTableNestedSchema, + PyAddFromDataset as AddFromDataset, PyCreateTableFromIterator as CreateTableFromIterator, TsCreateTableFromIterator as TsCreateTableFromIterator, RsCreateTableFromIterator as RsCreateTableFromIterator, @@ -217,9 +218,39 @@ for a `created_at` field. When you run this code it, should raise the `ValidationError`. -### From Batch Iterators +### Loading Large Datasets -For bulk ingestion on large datasets, prefer batching instead of adding one row at a time. Python and Rust can create a table directly from Arrow batch iterators or readers. In TypeScript, the practical pattern today is to create an empty table and append Arrow batches in chunks. +When ingesting large datasets, use `table.add()` on an existing table rather than +passing all data to `create_table()`. The `add()` method auto-parallelizes large +writes, while `create_table(name, data)` does not. + + +For best performance with large datasets, create an empty table first and then call +`table.add()`. This enables automatic write parallelism for materialized data sources. + + +#### From files (Parquet, CSV, etc.) +Python Only + +For file-based data, pass a `pyarrow.dataset.Dataset` to `table.add()`. This streams +data from disk without loading the entire dataset into memory. + + + + {AddFromDataset} + + + + +`pa.dataset()` input is currently Python-only. TypeScript and Rust support for +file-based dataset ingestion is tracked in +[lancedb#3173](https://github.com/lancedb/lancedb/issues/3173). + + +#### From iterators (custom batch generation) + +When you need custom batch logic — generating embeddings on the fly, transforming +rows from an external source, etc. — use an iterator of `RecordBatch` objects. @@ -243,15 +274,18 @@ Use this pattern when: Python can also consume iterators of other supported types like Pandas DataFrames or Python lists. -### Write with Concurrency - -For Python users who want to speed up bulk ingest jobs, it is usually better to write from Arrow-native sources that already produce batches, such as readers, datasets, or scanners, instead of first materializing everything as one large Python list. +#### Write parallelism -This is most useful when you are writing large amounts of data from an existing Arrow pipeline or another batch-oriented source. + +For materialized data (`pa.Table`, `pd.DataFrame`, `pa.dataset()`), LanceDB +automatically parallelizes large writes — no configuration needed. Auto-parallelism +targets approximately 1M rows or 2GB per write partition. -The current codebase also contains a lower-level ingest mechanism for describing a batch source together with extra metadata such as row counts and retry behavior. However, that path is not accepted by the released Python `create_table(...)` and `add(...)` workflow in `lancedb==0.30.0`, so we are not showing it as a docs example yet. - -In Rust, the same lower-level ingest mechanism is available, but the common batch-reader example above is usually the better starting point unless you specifically need to define your own batch source or provide size and retry hints. In TypeScript, this lower-level mechanism is not exposed publicly, so chunked Arrow batch writes remain the recommended pattern. +For streaming sources (iterators, `RecordBatchReader`), LanceDB cannot determine +total size upfront. A `parallelism` parameter to control this manually is planned +but not yet exposed in Python or TypeScript +([tracking issue](https://github.com/lancedb/lancedb/issues/3173)). + ## Create empty table You can create an empty table for scenarios where you want to add data to the table later. diff --git a/pyproject.toml b/pyproject.toml index 7e3056c..c9f2739 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12,<3.14" dependencies = [ - "lancedb>=0.30.0", + "lancedb>=0.30.2", "pyarrow>=23.0.1", "lance-namespace>=0.6.1", "pandas>=3.0.1", diff --git a/tests/py/test_tables.py b/tests/py/test_tables.py index 4f7b600..c0e51ae 100644 --- a/tests/py/test_tables.py +++ b/tests/py/test_tables.py @@ -268,6 +268,42 @@ def tz_must_match(cls, dt: datetime) -> datetime: assert ok is not None +def test_add_from_dataset(tmp_db, tmp_path): + import pyarrow as pa + import pyarrow.dataset as ds + import pyarrow.parquet as pq + + schema = pa.schema( + [ + pa.field("vector", pa.list_(pa.float32(), 4)), + pa.field("item", pa.utf8()), + pa.field("price", pa.float32()), + ] + ) + for i in range(3): + batch = pa.table( + { + "vector": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], + "item": [f"item{i * 2}", f"item{i * 2 + 1}"], + "price": [float(i * 2), float(i * 2 + 1)], + }, + schema=schema, + ) + pq.write_table(batch, tmp_path / f"part-{i}.parquet") + + data_path = str(tmp_path) + + # --8<-- [start:add_from_dataset] + import pyarrow.dataset as ds + + dataset = ds.dataset(data_path, format="parquet") + db = tmp_db + table = db.create_table("my_table", schema=dataset.schema, mode="overwrite") + table.add(dataset) + # --8<-- [end:add_from_dataset] + assert table.count_rows() == 6 + + def test_table_creation_from_iterator(tmp_db): # --8<-- [start:create_table_from_iterator] import pyarrow as pa @@ -451,7 +487,7 @@ def make_batches(): yield pa.RecordBatch.from_arrays( [ pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)), - pa.array([f"item{i*2+1}", f"item{i*2+2}"]), + pa.array([f"item{i * 2 + 1}", f"item{i * 2 + 2}"]), pa.array([float((i * 2 + 1) * 10), float((i * 2 + 2) * 10)]), ], ["vector", "item", "price"], @@ -584,11 +620,7 @@ def test_merge_matched_update_only(tmp_db): } ) - ( - table.merge_insert("id") - .when_matched_update_all() - .execute(incoming_users) - ) + (table.merge_insert("id").when_matched_update_all().execute(incoming_users)) # --8<-- [end:merge_matched_update_only] rows = table.to_arrow().sort_by("id").to_pylist() assert rows == [ @@ -623,11 +655,7 @@ def test_insert_if_not_exists(tmp_db): } ) - ( - table.merge_insert("id") - .when_not_matched_insert_all() - .execute(incoming_users) - ) + (table.merge_insert("id").when_not_matched_insert_all().execute(incoming_users)) # --8<-- [end:insert_if_not_exists] rows = table.to_arrow().sort_by("id").to_pylist() assert rows == [ @@ -1222,7 +1250,9 @@ def test_consistency_strong(tmp_db): uri = str(tmp_db.uri) writer_db = lancedb.connect(uri) reader_db = lancedb.connect(uri, read_consistency_interval=timedelta(0)) - writer_table = writer_db.create_table("consistency_strong_table", [{"id": 1}], mode="overwrite") + writer_table = writer_db.create_table( + "consistency_strong_table", [{"id": 1}], mode="overwrite" + ) reader_table = reader_db.open_table("consistency_strong_table") writer_table.add([{"id": 2}]) rows_after_write = reader_table.count_rows() @@ -1238,7 +1268,9 @@ def test_consistency_eventual(tmp_db): uri = str(tmp_db.uri) writer_db = lancedb.connect(uri) reader_db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=3600)) - writer_table = writer_db.create_table("consistency_eventual_table", [{"id": 1}], mode="overwrite") + writer_table = writer_db.create_table( + "consistency_eventual_table", [{"id": 1}], mode="overwrite" + ) reader_table = reader_db.open_table("consistency_eventual_table") writer_table.add([{"id": 2}]) rows_after_write = reader_table.count_rows() @@ -1252,7 +1284,9 @@ def test_consistency_checkout_latest(tmp_db): uri = str(tmp_db.uri) writer_db = lancedb.connect(uri) reader_db = lancedb.connect(uri) - writer_table = writer_db.create_table("consistency_checkout_latest_table", [{"id": 1}], mode="overwrite") + writer_table = writer_db.create_table( + "consistency_checkout_latest_table", [{"id": 1}], mode="overwrite" + ) reader_table = reader_db.open_table("consistency_checkout_latest_table") writer_table.add([{"id": 2}]) diff --git a/uv.lock b/uv.lock index 06b401b..6a271e9 100644 --- a/uv.lock +++ b/uv.lock @@ -380,7 +380,7 @@ dependencies = [ requires-dist = [ { name = "geneva", specifier = ">=0.12.0" }, { name = "lance-namespace", specifier = ">=0.6.1" }, - { name = "lancedb", specifier = ">=0.30.0" }, + { name = "lancedb", specifier = ">=0.30.2" }, { name = "pandas", specifier = ">=3.0.1" }, { name = "pillow", specifier = ">=12.1.1" }, { name = "polars", specifier = ">=1.39.2" },