Skip to content

Commit 63387b1

Browse files
committed
partners[milvus]: refine milvus array dtype
Signed-off-by: ChengZi <[email protected]>
1 parent c171cd5 commit 63387b1

File tree

2 files changed

+56
-5
lines changed

2 files changed

+56
-5
lines changed

libs/partners/milvus/langchain_milvus/vectorstores/milvus.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,15 @@ def _create_collection(
472472
)
473473
raise ValueError(f"Metadata key {key} is reserved.")
474474
# Infer the corresponding datatype of the metadata
475-
field_type = "dtype"
476475
if (
477476
key in self.metadata_schema # type: ignore
478-
and field_type in self.metadata_schema[key] # type: ignore
477+
and "dtype" in self.metadata_schema[key] # type: ignore
479478
):
480-
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore
479+
kwargs = self.metadata_schema[key].get("kwargs", {}) # type: ignore
481480
fields.append(
482481
FieldSchema(
483482
name=key,
484-
dtype=self.metadata_schema[key][field_type], # type: ignore
483+
dtype=self.metadata_schema[key]["dtype"], # type: ignore
485484
**kwargs,
486485
)
487486
)
@@ -497,11 +496,16 @@ def _create_collection(
497496
key,
498497
)
499498
raise ValueError(f"Unrecognized datatype for {key}.")
500-
# Dataype is a string/varchar equivalent
499+
# Datatype is a string/varchar equivalent
501500
elif dtype == DataType.VARCHAR:
502501
fields.append(
503502
FieldSchema(key, DataType.VARCHAR, max_length=65_535)
504503
)
504+
# infer_dtype_bydata currently can't recognize array type,
505+
# so this line can not be accessed.
506+
# This line may need to be modified in the future when
507+
# infer_dtype_bydata can recognize array type.
508+
# https://github.com/milvus-io/pymilvus/issues/2165
505509
elif dtype == DataType.ARRAY:
506510
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore
507511
fields.append(

libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def _milvus_from_texts(
3939
# connection_args={"uri": "http://127.0.0.1:19530"},
4040
connection_args={"uri": "./milvus_demo.db"},
4141
drop_old=drop,
42+
consistency_level="Strong",
4243
**kwargs,
4344
)
4445

@@ -303,6 +304,51 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
303304
}
304305

305306

307+
def test_milvus_array_field() -> None:
308+
"""Manually specify metadata schema, including an array_field.
309+
For more information about array data type and filtering, please refer to
310+
https://milvus.io/docs/array_data_type.md
311+
"""
312+
from pymilvus import DataType
313+
314+
texts = ["foo", "bar", "baz"]
315+
metadatas = [{"id": i, "array_field": [i, i + 1, i + 2]} for i in range(len(texts))]
316+
317+
# Manually specify metadata schema, including an array_field.
318+
# If some fields are not specified, Milvus will automatically infer their schemas.
319+
docsearch = _milvus_from_texts(
320+
metadatas=metadatas,
321+
metadata_schema={
322+
"array_field": {
323+
"dtype": DataType.ARRAY,
324+
"kwargs": {"element_type": DataType.INT64, "max_capacity": 50},
325+
},
326+
# "id": {
327+
# "dtype": DataType.INT64,
328+
# }
329+
},
330+
)
331+
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
332+
assert len(output) == 2
333+
output = docsearch.similarity_search(
334+
"foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)"
335+
)
336+
assert len(output) == 2
337+
338+
# If we use enable_dynamic_field,
339+
# there is no need to manually specify metadata schema.
340+
docsearch = _milvus_from_texts(
341+
enable_dynamic_field=True,
342+
metadatas=metadatas,
343+
)
344+
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
345+
assert len(output) == 2
346+
output = docsearch.similarity_search(
347+
"foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)"
348+
)
349+
assert len(output) == 2
350+
351+
306352
# if __name__ == "__main__":
307353
# test_milvus()
308354
# test_milvus_vector_search()
@@ -319,3 +365,4 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
319365
# test_milvus_enable_dynamic_field()
320366
# test_milvus_disable_dynamic_field()
321367
# test_milvus_metadata_field()
368+
# test_milvus_array_field()

0 commit comments

Comments
 (0)