Skip to content

Commit 803d2f4

Browse files
committed
partners[milvus]: refine milvus array dtype
Signed-off-by: ChengZi <[email protected]>
1 parent c171cd5 commit 803d2f4

File tree

2 files changed

+53
-5
lines changed

2 files changed

+53
-5
lines changed

libs/partners/milvus/langchain_milvus/vectorstores/milvus.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,15 @@ def _create_collection(
472472
)
473473
raise ValueError(f"Metadata key {key} is reserved.")
474474
# Infer the corresponding datatype of the metadata
475-
field_type = "dtype"
476475
if (
477476
key in self.metadata_schema # type: ignore
478-
and field_type in self.metadata_schema[key] # type: ignore
477+
and "dtype" in self.metadata_schema[key] # type: ignore
479478
):
480-
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore
479+
kwargs = self.metadata_schema[key].get("kwargs", {}) # type: ignore
481480
fields.append(
482481
FieldSchema(
483482
name=key,
484-
dtype=self.metadata_schema[key][field_type], # type: ignore
483+
dtype=self.metadata_schema[key]["dtype"], # type: ignore
485484
**kwargs,
486485
)
487486
)
@@ -497,11 +496,16 @@ def _create_collection(
497496
key,
498497
)
499498
raise ValueError(f"Unrecognized datatype for {key}.")
500-
# Dataype is a string/varchar equivalent
499+
# Datatype is a string/varchar equivalent
501500
elif dtype == DataType.VARCHAR:
502501
fields.append(
503502
FieldSchema(key, DataType.VARCHAR, max_length=65_535)
504503
)
504+
# infer_dtype_bydata currently can't recognize array type,
505+
# so this line can not be accessed.
506+
# This line may need to be modified in the future when
507+
# infer_dtype_bydata can recognize array type.
508+
# https://github.com/milvus-io/pymilvus/issues/2165
505509
elif dtype == DataType.ARRAY:
506510
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore
507511
fields.append(

libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def _milvus_from_texts(
3939
# connection_args={"uri": "http://127.0.0.1:19530"},
4040
connection_args={"uri": "./milvus_demo.db"},
4141
drop_old=drop,
42+
consistency_level="Strong",
4243
**kwargs,
4344
)
4445

@@ -302,6 +303,48 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
302303
docsearch._partition_key_field,
303304
}
304305

306+
def test_milvus_array_field() -> None:
307+
"""Manually specify metadata schema, including an array_field.
308+
For more information about array data type and filtering, please refer to
309+
https://milvus.io/docs/array_data_type.md
310+
"""
311+
from pymilvus import DataType
312+
texts = ["foo", "bar", "baz"]
313+
metadatas = [{"id": i, "array_field": [i, i+1, i+2]} for i in range(len(texts))]
314+
315+
# Manually specify metadata schema, including an array_field.
316+
# If some fields are not specified, Milvus will automatically infer their schemas.
317+
docsearch = _milvus_from_texts(
318+
metadatas=metadatas,
319+
metadata_schema={
320+
"array_field": {
321+
"dtype": DataType.ARRAY,
322+
"kwargs": {
323+
"element_type": DataType.INT64,
324+
"max_capacity": 50
325+
}
326+
},
327+
# "id": {
328+
# "dtype": DataType.INT64,
329+
# }
330+
}
331+
)
332+
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
333+
assert len(output) == 2
334+
output = docsearch.similarity_search("foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)")
335+
assert len(output) == 2
336+
337+
# If we use enable_dynamic_field,
338+
# there is no need to manually specify metadata schema.
339+
docsearch = _milvus_from_texts(
340+
enable_dynamic_field=True,
341+
metadatas=metadatas,
342+
)
343+
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
344+
assert len(output) == 2
345+
output = docsearch.similarity_search("foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)")
346+
assert len(output) == 2
347+
305348

306349
# if __name__ == "__main__":
307350
# test_milvus()
@@ -319,3 +362,4 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
319362
# test_milvus_enable_dynamic_field()
320363
# test_milvus_disable_dynamic_field()
321364
# test_milvus_metadata_field()
365+
# test_milvus_array_field()

0 commit comments

Comments
 (0)