-
Notifications
You must be signed in to change notification settings - Fork 18.6k
milvus: add array data type for collection create #23219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 37 commits
b1b11de
275ea24
a8e2d08
c7fe30c
3ab4b6c
91635fa
d3012fd
bc33253
c77b03a
a4ce1a9
9f70ca7
b8b94bd
2b5d0cf
cecd707
d6b90f7
8ea4186
fb3ed0f
7719a0d
0e22cff
afd0140
3601c2d
ae234cb
8fa2512
2b9d310
c25cd52
99f8db8
8585008
410d616
f94a687
9ae4191
3195bca
ce58e66
66101e7
d14cd87
4f8f1c4
e31a606
6ec9b86
c171cd5
63387b1
798dcb8
f82668f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,6 +138,30 @@ class Milvus(VectorStore): | |
metadata_field (str): Name of the metadta field. Defaults to None. | ||
When metadata_field is specified, | ||
the document's metadata will store as json. | ||
metadata_schema (Optional[dict]): What is the dataType of each metadata fields, | ||
Default is Varchar, Example of field schema dict is :- | ||
{ | ||
"column1": { | ||
"dtype": "DataType.ARRAY", | ||
"kwargs": { | ||
"element_type": "DataType.VARCHAR", | ||
"max_capacity": 20, | ||
"max_length": 1000 | ||
} | ||
}, | ||
"column2": { | ||
"dtype": "DataType.ARRAY", | ||
"kwargs": { | ||
"element_type": "DataType.INT64", | ||
"max_capacity": 50 | ||
} | ||
}, | ||
"column3": { | ||
"dtype": "DataType.INT64" | ||
} | ||
} | ||
|
||
|
||
|
||
The connection args used for this class comes in the form of a dict, | ||
here are a few of the options: | ||
|
@@ -208,6 +232,7 @@ def __init__( | |
replica_number: int = 1, | ||
timeout: Optional[float] = None, | ||
num_shards: Optional[int] = None, | ||
metadata_schema: Optional[dict[str, Any]] = None, | ||
): | ||
"""Initialize the Milvus vector store.""" | ||
try: | ||
|
@@ -267,6 +292,7 @@ def __init__( | |
self.replica_number = replica_number | ||
self.timeout = timeout | ||
self.num_shards = num_shards | ||
self.metadata_schema = metadata_schema | ||
|
||
# Create the connection to the server | ||
if connection_args is None: | ||
|
@@ -397,24 +423,43 @@ def _create_collection( | |
# Create FieldSchema for each entry in metadata. | ||
for key, value in metadatas[0].items(): | ||
rgupta2508 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Infer the corresponding datatype of the metadata | ||
dtype = infer_dtype_bydata(value) | ||
# Datatype isn't compatible | ||
if dtype == DataType.UNKNOWN or dtype == DataType.NONE: | ||
logger.error( | ||
( | ||
"Failure to create collection, " | ||
"unrecognized dtype for key: %s" | ||
), | ||
key, | ||
) | ||
raise ValueError(f"Unrecognized datatype for {key}.") | ||
# Dataype is a string/varchar equivalent | ||
elif dtype == DataType.VARCHAR: | ||
field_type = "dtype" | ||
if ( | ||
key in self.metadata_schema # type: ignore | ||
and field_type in self.metadata_schema[key] # type: ignore | ||
): | ||
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore | ||
fields.append( | ||
FieldSchema(key, DataType.VARCHAR, max_length=65_535) | ||
FieldSchema( | ||
name=key, | ||
dtype=self.metadata_schema[key][field_type], # type: ignore | ||
**kwargs, | ||
) | ||
) | ||
else: | ||
fields.append(FieldSchema(key, dtype)) | ||
dtype = infer_dtype_bydata(value) | ||
# Datatype isn't compatible | ||
if dtype == DataType.UNKNOWN or dtype == DataType.NONE: | ||
logger.error( | ||
( | ||
"Failure to create collection, " | ||
"unrecognized dtype for key: %s" | ||
), | ||
key, | ||
) | ||
raise ValueError(f"Unrecognized datatype for {key}.") | ||
# Dataype is a string/varchar equivalent | ||
elif dtype == DataType.VARCHAR: | ||
fields.append( | ||
FieldSchema(key, DataType.VARCHAR, max_length=65_535) | ||
) | ||
elif dtype == DataType.ARRAY: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to still rely on the solution of the pymilvus problem. It seems that pymilvus needs to support return DataType.ARRAY and other informations, before this line of code can take effect milvus-io/pymilvus#2144 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In addition, could you please add the corresponding unit test as the guarantee to the quality of other possible future PRs |
||
kwargs = self.metadata_schema[key]["kwargs"] # type: ignore | ||
fields.append( | ||
FieldSchema(name=key, dtype=DataType.ARRAY, **kwargs) | ||
) | ||
else: | ||
fields.append(FieldSchema(key, dtype)) | ||
|
||
# Create the text field | ||
fields.append( | ||
|
Uh oh!
There was an error while loading. Please reload this page.