Sagar Vadodaria 2 weeks ago committed by GitHub
commit f3c1b8157f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -307,6 +307,39 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8360f3f",
"metadata": {},
"outputs": [],
"source": [
"url = \"<---qdrant url here --->\"\n",
"qdrant = Qdrant.from_texts(\n",
" texts=[\"abc\", \"def\"],\n",
" embedding=embeddings,\n",
" metadatas=[{'page_number': 1}, {'page_number': 2}],\n",
" ids=[1,2], #integer ids\n",
" url=url,\n",
" prefer_grpc=True,\n",
" collection_name=\"my_documents\",\n",
" force_recreate=True,\n",
")\n",
"\n",
"# OR\n",
"\n",
"qdrant = Qdrant.from_texts(\n",
" texts=[\"abc\", \"def\"],\n",
" embedding=embeddings,\n",
" metadatas=[{'page_number': 1}, {'page_number': 2}],\n",
" ids=['fa38d572-4c31-4579-aedc-1960d79df6df','cdc1aa36-d6ab-4fb2-8a94-56674fd27484'], #string based UUID ids\n",
" url=url,\n",
" prefer_grpc=True,\n",
" collection_name=\"my_documents\",\n",
" force_recreate=True,\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
@ -537,6 +570,37 @@
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
]
},
{
"cell_type": "markdown",
"id": "91b3f6a5",
"metadata": {},
"source": [
"## Chunk Window Retrieval with Similarity search\n",
"\n",
"Based on the notion of sentence window retrieval. The idea is to get context that surrounds the result of similarity search, i.e. fetch additional documents (both above and below) for each of the results returned by similarity search. This way, we provide more supporting information for each document and pass that on to LLM for better decision making. This also helps aleviate some issues around chunking and disproptionate split of information that should ideally looked together and in a more wholistic way."
]
},
{
"cell_type": "markdown",
"id": "8128c09f",
"metadata": {},
"source": [
"### Caveat\n",
"\n",
"To use chunk window retrieval, the prerequiste is that you must set integer ids using `Qdrant.from_texts`. Please refer to above 'Recreating the collection' section."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4fec30f5",
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"found_docs = qdrant.chunk_window_retrieval_similarity_search(query, k=3, window_size=2)"
]
},
{
"attachments": {},
"cell_type": "markdown",

@ -168,7 +168,7 @@ class Qdrant(VectorStore):
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
batch_size: int = 64,
**kwargs: Any,
) -> List[str]:
@ -203,7 +203,7 @@ class Qdrant(VectorStore):
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
batch_size: int = 64,
**kwargs: Any,
) -> List[str]:
@ -1147,6 +1147,388 @@ class Qdrant(VectorStore):
for i in mmr_selected
]
def chunk_window_retrieval_similarity_search(
self,
query: str,
k: int = 4,
window_size: int = 2,
filter: Optional[MetadataFilter] = None,
search_params: Optional[common_types.SearchParams] = None,
offset: int = 0,
score_threshold: Optional[float] = None,
consistency: Optional[common_types.ReadConsistency] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs most similar to query.
The result step comprises of documents which are retrieved in 2 steps.
The first step is regular similarity search which fetches k documents.
The second step retrieves additional documents that are present before
and after the documents retrieved in first step.
The numnber of additional documents to add to the result set is
determined by window_size.
Hence, for this method to work as expected, it is required that IDs of
the document are integer based
and added in sequence via 'Qdrant.add_texts' method.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
window_size:
Number of Additional Documents to fetch which surrounds
(both before and after) the retrieved similar document.
Defaults to 2.
filter: Filter by metadata. Defaults to None.
search_params: Additional search params
offset:
Offset of the first result to return.
May be used to paginate results.
Note: large offset values may cause performance issues.
score_threshold:
Define a minimal score threshold for the result.
If defined, less similar results will not be returned.
Score of the returned result might be higher or smaller than the
threshold depending on the Distance function used.
E.g. for cosine similarity only higher scores will be returned.
consistency:
Read consistency of the search. Defines how many replicas should be
queried before returning the result.
Values:
- int - number of replicas to query, values should present in all
queried replicas
- 'majority' - query all replicas, but return values present in the
majority of replicas
- 'quorum' - query the majority of replicas, return values present in
all of them
- 'all' - query all replicas, and return values present in all replicas
**kwargs:
Any other named arguments to pass through to QdrantClient.search()
Returns:
List of Documents most similar to the query along with extra context
documents which surround the similarity search results
"""
embedding = self._embed_query(query)
return self.chunk_window_retrieval_similarity_search_by_vector(
embedding,
k,
window_size=window_size,
filter=filter,
search_params=search_params,
offset=offset,
score_threshold=score_threshold,
consistency=consistency,
**kwargs,
)
@sync_call_fallback
async def achunk_window_retrieval_similarity_search(
self,
query: str,
k: int = 4,
window_size: int = 2,
filter: Optional[MetadataFilter] = None,
search_params: Optional[common_types.SearchParams] = None,
offset: int = 0,
score_threshold: Optional[float] = None,
consistency: Optional[common_types.ReadConsistency] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs most similar to query.
The result step comprises of documents which are retrieved in 2 steps.
The first step is regular similarity search which fetches k documents.
The second step retrieves additional documents that are present before
and after the documents retrieved in first step.
The numnber of additional documents to add to the result set is
determined by window_size.
Hence, for this method to work as expected, it is required that IDs of
the document are integer based
and added in sequence via 'Qdrant.add_texts' method.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
window_size:
Number of Additional Documents to fetch which surrounds
(both before and after) the retrieved similar document.
Defaults to 2.
filter: Filter by metadata. Defaults to None.
search_params: Additional search params
offset:
Offset of the first result to return.
May be used to paginate results.
Note: large offset values may cause performance issues.
score_threshold:
Define a minimal score threshold for the result.
If defined, less similar results will not be returned.
Score of the returned result might be higher or smaller than the
threshold depending on the Distance function used.
E.g. for cosine similarity only higher scores will be returned.
consistency:
Read consistency of the search. Defines how many replicas should be
queried before returning the result.
Values:
- int - number of replicas to query, values should present in all
queried replicas
- 'majority' - query all replicas, but return values present in the
majority of replicas
- 'quorum' - query the majority of replicas, return values present in
all of them
- 'all' - query all replicas, and return values present in all replicas
**kwargs:
Any other named arguments to pass through to QdrantClient.search()
Returns:
List of Documents most similar to the query along with extra context
documents which surround the similarity search results
"""
embedding = await self._aembed_query(query)
return await self.achunk_window_retrieval_similarity_search_by_vector(
embedding,
k,
window_size=window_size,
filter=filter,
search_params=search_params,
offset=offset,
score_threshold=score_threshold,
consistency=consistency,
kwargs=kwargs,
)
def chunk_window_retrieval_similarity_search_by_vector(
self,
embedding: List[float],
k: int = 4,
window_size: int = 2,
filter: Optional[MetadataFilter] = None,
search_params: Optional[common_types.SearchParams] = None,
offset: int = 0,
score_threshold: Optional[float] = None,
consistency: Optional[common_types.ReadConsistency] = None,
**kwargs: Any,
):
"""Return docs most similar to query.
The result step comprises of documents which are retrieved in 2 steps.
The first step is regular similarity search which fetches k documents.
The second step retrieves additional documents that are present before
and after the documents retrieved in first step.
The numnber of additional documents to add to the result set is
determined by window_size.
Hence, for this method to work as expected, it is required that IDs of
the document are integer based
and added in sequence via 'Qdrant.add_texts' method.
Args:
embedding: Embedding vector to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
window_size:
Number of Additional Documents to fetch which surrounds
(both before and after) the retrieved similar document.
Defaults to 2.
filter: Filter by metadata. Defaults to None.
search_params: Additional search params
offset:
Offset of the first result to return.
May be used to paginate results.
Note: large offset values may cause performance issues.
score_threshold:
Define a minimal score threshold for the result.
If defined, less similar results will not be returned.
Score of the returned result might be higher or smaller than the
threshold depending on the Distance function used.
E.g. for cosine similarity only higher scores will be returned.
consistency:
Read consistency of the search. Defines how many replicas should be
queried before returning the result.
Values:
- int - number of replicas to query, values should present in all
queried replicas
- 'majority' - query all replicas, but return values present in the
majority of replicas
- 'quorum' - query the majority of replicas, return values present in
all of them
- 'all' - query all replicas, and return values present in all replicas
**kwargs:
Any other named arguments to pass through to QdrantClient.search()
Returns:
List of Documents most similar to the query along with extra context
documents which surround the similarity search results
"""
query_vector = embedding
if self.vector_name is not None:
query_vector = (self.vector_name, embedding) # type: ignore[assignment]
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
query_filter=filter,
search_params=search_params,
limit=k,
offset=offset,
with_payload=True,
with_vectors=False, # Langchain does not expect vectors to be returned
score_threshold=score_threshold,
consistency=consistency,
**kwargs,
)
results_ids_list = [obj.id for obj in results]
window_point_ids = []
for id in results_ids_list:
# check the type of PointId, it should be int and not UUID string
if not isinstance(id, int):
raise QdrantException(
f"'Chunk Window Retrieval needs PointId to be integer. \
PointId found was '{id}'"
)
for x in range(1, window_size + 1):
window_point_ids.append(id - x)
window_point_ids.append(id + x)
# make another call to Qdrant and fetch all the points with Ids built above
window_point_results = self.client.retrieve(
collection_name=self.collection_name,
ids=window_point_ids,
with_payload=True,
with_vectors=False,
)
# merge and sort all the chunks
merged_list = results + window_point_results
sorted_list = sorted(merged_list, key=lambda obj: obj.id)
return [
(
self._document_from_scored_point(
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
)
)
for result in sorted_list
]
@sync_call_fallback
async def achunk_window_retrieval_similarity_search_by_vector(
self,
embedding: List[float],
k: int = 4,
window_size: int = 2,
filter: Optional[MetadataFilter] = None,
search_params: Optional[common_types.SearchParams] = None,
offset: int = 0,
score_threshold: Optional[float] = None,
consistency: Optional[common_types.ReadConsistency] = None,
**kwargs: Any,
):
"""Return docs most similar to query.
The result step comprises of documents which are retrieved in 2 steps.
The first step is regular similarity search which fetches k documents.
The second step retrieves additional documents that are present before
and after the documents retrieved in first step.
The numnber of additional documents to add to the result set is
determined by window_size.
Hence, for this method to work as expected, it is required that IDs of
the document are integer based
and added in sequence via 'Qdrant.add_texts' method.
Args:
embedding: Embedding vector to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
window_size:
Number of Additional Documents to fetch which surrounds
(both before and after) the retrieved similar document.
Defaults to 2.
filter: Filter by metadata. Defaults to None.
search_params: Additional search params
offset:
Offset of the first result to return.
May be used to paginate results.
Note: large offset values may cause performance issues.
score_threshold:
Define a minimal score threshold for the result.
If defined, less similar results will not be returned.
Score of the returned result might be higher or smaller than the
threshold depending on the Distance function used.
E.g. for cosine similarity only higher scores will be returned.
consistency:
Read consistency of the search. Defines how many replicas should be
queried before returning the result.
Values:
- int - number of replicas to query, values should present in all
queried replicas
- 'majority' - query all replicas, but return values present in the
majority of replicas
- 'quorum' - query the majority of replicas, return values present in
all of them
- 'all' - query all replicas, and return values present in all replicas
**kwargs:
Any other named arguments to pass through to QdrantClient.search()
Returns:
List of Documents most similar to the query along with extra context
documents which surround the similarity search results
"""
from qdrant_client.local.async_qdrant_local import AsyncQdrantLocal
if self.async_client is None or isinstance(
self.async_client._client, AsyncQdrantLocal
):
raise NotImplementedError(
"QdrantLocal cannot interoperate with sync and async clients"
)
query_vector = embedding
if self.vector_name is not None:
query_vector = (self.vector_name, embedding) # type: ignore[assignment]
results = await self.async_client.search(
collection_name=self.collection_name,
query_vector=query_vector,
query_filter=filter,
search_params=search_params,
limit=k,
offset=offset,
with_payload=True,
with_vectors=False, # Langchain does not expect vectors to be returned
score_threshold=score_threshold,
consistency=consistency,
**kwargs,
)
results_ids_list = [obj.id for obj in results]
window_point_ids = []
for id in results_ids_list:
# check the type of PointId, it should be int and not UUID string
if not isinstance(id, int):
raise QdrantException(
f"'Chunk Window Retrieval needs PointId to be integer. \
PointId found was '{id}'"
)
for x in range(1, window_size + 1):
window_point_ids.append(id - x)
window_point_ids.append(id + x)
# make another call to Qdrant and fetch all the points with Ids built above
window_point_results = await self.async_client.retrieve(
collection_name=self.collection_name,
ids=window_point_ids,
with_payload=True,
with_vectors=False,
)
# merge and sort all the chunks
merged_list = results + window_point_results
sorted_list = sorted(merged_list, key=lambda obj: obj.id)
return [
(
self._document_from_scored_point(
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
)
)
for result in sorted_list
]
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
"""Delete by vector ID or other criteria.
@ -1202,7 +1584,7 @@ class Qdrant(VectorStore):
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
location: Optional[str] = None,
url: Optional[str] = None,
port: Optional[int] = 6333,
@ -1423,7 +1805,7 @@ class Qdrant(VectorStore):
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
location: Optional[str] = None,
url: Optional[str] = None,
port: Optional[int] = 6333,
@ -2150,7 +2532,7 @@ class Qdrant(VectorStore):
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
batch_size: int = 64,
) -> Generator[Tuple[List[str], List[rest.PointStruct]], None, None]:
from qdrant_client.http import models as rest
@ -2192,7 +2574,7 @@ class Qdrant(VectorStore):
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[Sequence[str]] = None,
ids: Optional[Union[Sequence[str], Sequence[int]]] = None,
batch_size: int = 64,
) -> AsyncGenerator[Tuple[List[str], List[rest.PointStruct]], None]:
from qdrant_client.http import models as rest

@ -104,6 +104,35 @@ def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
assert set(ids) == set(stored_ids)
@pytest.mark.parametrize("batch_size", [1, 64])
def test_qdrant_add_texts_stores_int_ids(batch_size: int) -> None:
"""Test end to end Qdrant.add_texts stores provided ids (of type 'int')"""
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
ids = [
1,
2,
]
client = QdrantClient(":memory:")
collection_name = uuid.uuid4().hex
client.recreate_collection(
collection_name,
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
)
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
returned_ids = vec_store.add_texts(["abc", "def"], ids=ids, batch_size=batch_size)
assert all(first == second for first, second in zip(ids, returned_ids))
assert 2 == client.count(collection_name).count
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
for id in stored_ids:
assert isinstance(id, int)
assert set(ids) == set(stored_ids)
@pytest.mark.parametrize("vector_name", ["custom-vector"])
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""

@ -0,0 +1,213 @@
from typing import Optional
import pytest
from langchain_core.documents import Document
from langchain_community.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
def test_qdrant_chunk_window_search(
batch_size: int,
content_payload_key: str,
metadata_payload_key: str,
vector_name: Optional[str],
) -> None:
"""Test end to end construction and search."""
texts = [
"Dogs are known for their loyalty and companionship",
"Dogs have temperaments ranging from energetic and playful to calm and gentle",
"Dogs are highly intelligent animals",
"Cars come in various shapes, sizes, and colors",
"Drivers often customize cars to reflect their personality",
"Cars enable mobility, facilitating travel and exploration",
]
docsearch = Qdrant.from_texts(
texts,
ConsistentFakeEmbeddings(),
ids=[1, 2, 3, 4, 5, 6],
location=":memory:",
content_payload_key=content_payload_key,
metadata_payload_key=metadata_payload_key,
batch_size=batch_size,
vector_name=vector_name,
)
output = docsearch.chunk_window_retrieval_similarity_search(
"Dogs have temperaments ranging from energetic and playful to calm and gentle",
k=1,
window_size=1,
)
assert_documents_equals(
actual=output,
expected=[
Document(page_content="Dogs are known for their loyalty and companionship"),
Document(
page_content="Dogs have temperaments ranging from energetic \
and playful to calm and gentle"
),
Document(page_content="Dogs are highly intelligent animals"),
],
)
# below assert highlights that even though the chunks are not semantically
# similar, they will still be returned based on window_size
# we are not reranking the chunks in this approach.
output = docsearch.chunk_window_retrieval_similarity_search(
"Dogs are highly intelligent animals", k=1, window_size=1
)
assert_documents_equals(
actual=output,
expected=[
Document(
page_content="Dogs have temperaments ranging from energetic \
and playful to calm and gentle"
),
Document(page_content="Dogs are highly intelligent animals"),
Document(page_content="Cars come in various shapes, sizes, and colors"),
],
)
@pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
def test_qdrant_chunk_window_search_by_vector(
batch_size: int,
content_payload_key: str,
metadata_payload_key: str,
vector_name: Optional[str],
) -> None:
"""Test end to end construction and search."""
texts = [
"Dogs are known for their loyalty and companionship",
"Dogs have temperaments ranging from energetic and playful to calm and gentle",
"Dogs are highly intelligent animals",
"Cars come in various shapes, sizes, and colors",
"Drivers often customize cars to reflect their personality",
"Cars enable mobility, facilitating travel and exploration",
]
embeddings = ConsistentFakeEmbeddings()
docsearch = Qdrant.from_texts(
texts,
embeddings,
ids=[1, 2, 3, 4, 5, 6],
location=":memory:",
content_payload_key=content_payload_key,
metadata_payload_key=metadata_payload_key,
batch_size=batch_size,
vector_name=vector_name,
)
embed_query = embeddings.embed_query(
"Dogs have temperaments ranging from energetic and playful to calm and gentle"
)
output = docsearch.chunk_window_retrieval_similarity_search_by_vector(
embed_query, k=1, window_size=1
)
assert_documents_equals(
actual=output,
expected=[
Document(page_content="Dogs are known for their loyalty and companionship"),
Document(
page_content="Dogs have temperaments ranging from energetic \
and playful to calm and gentle"
),
Document(page_content="Dogs are highly intelligent animals"),
],
)
# below assert highlights that even though the chunks are not semantically
# similar, they will still be returned based on window_size
# we are not reranking the chunks in this approach.
embed_query = embeddings.embed_query("Dogs are highly intelligent animals")
output = docsearch.chunk_window_retrieval_similarity_search_by_vector(
embed_query, k=1, window_size=1
)
assert_documents_equals(
actual=output,
expected=[
Document(
page_content="Dogs have temperaments ranging from energetic \
and playful to calm and gentle"
),
Document(page_content="Dogs are highly intelligent animals"),
Document(page_content="Cars come in various shapes, sizes, and colors"),
],
)
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
def test_qdrant_chunk_window_search_filters_with_qdrant_filters(
vector_name: Optional[str],
) -> None:
"""Test end to end construction and search."""
from qdrant_client.http import models as rest
texts = [
"Dogs are known for their loyalty and companionship",
"Dogs have temperaments ranging from energetic and playful to calm and gentle",
"Dogs are highly intelligent animals",
"Cars come in various shapes, sizes, and colors",
"Drivers often customize cars to reflect their personality",
"Cars enable mobility, facilitating travel and exploration",
]
metadatas = [
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
for i in range(len(texts))
]
docsearch = Qdrant.from_texts(
texts,
ConsistentFakeEmbeddings(),
metadatas=metadatas,
ids=[1, 2, 3, 4, 5, 6],
location=":memory:",
vector_name=vector_name,
)
qdrant_filter = rest.Filter(
must=[
rest.FieldCondition(
key="metadata.page",
match=rest.MatchValue(value=2),
),
rest.FieldCondition(
key="metadata.details.page",
match=rest.MatchValue(value=3),
),
rest.FieldCondition(
key="metadata.details.pages",
match=rest.MatchAny(any=[4]),
),
]
)
output = docsearch.chunk_window_retrieval_similarity_search(
"Dogs have temperaments ranging from energetic and playful to calm and gentle",
k=1,
window_size=1,
filter=qdrant_filter,
)
assert_documents_equals(
actual=output,
expected=[
Document(
page_content="Dogs have temperaments ranging from energetic \
and playful to calm and gentle",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
),
Document(
page_content="Dogs are highly intelligent animals",
metadata={"page": 2, "details": {"page": 3, "pages": [4, -1]}},
),
Document(
page_content="Cars come in various shapes, sizes, and colors",
metadata={"page": 3, "details": {"page": 4, "pages": [5, -1]}},
),
],
)

@ -66,6 +66,39 @@ def test_qdrant_from_texts_stores_ids(
assert set(ids) == set(stored_ids)
@pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
def test_qdrant_from_texts_stores_int_ids(
batch_size: int, vector_name: Optional[str]
) -> None:
"""Test end to end Qdrant.from_texts stores provided ids (of type 'int')"""
from qdrant_client import QdrantClient
collection_name = uuid.uuid4().hex
with tempfile.TemporaryDirectory() as tmpdir:
ids = [
1,
2,
]
vec_store = Qdrant.from_texts(
["abc", "def"],
ConsistentFakeEmbeddings(),
ids=ids,
collection_name=collection_name,
path=str(tmpdir),
batch_size=batch_size,
vector_name=vector_name,
)
del vec_store
client = QdrantClient(path=str(tmpdir))
assert 2 == client.count(collection_name).count
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
for id in stored_ids:
assert isinstance(id, int)
assert set(ids) == set(stored_ids)
@pytest.mark.parametrize("vector_name", ["custom-vector"])
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""

Loading…
Cancel
Save