mirror of https://github.com/hwchase17/langchain
qdrant: init package (#21146)
## Description This PR introduces the new `langchain-qdrant` partner package, intending to deprecate the community package. ## Changes - Moved the Qdrant vector store implementation `/libs/partners/qdrant` with integration tests. - The conditional imports of the client library are now regular with minor implementation improvements. - Added a deprecation warning to `langchain_community.vectorstores.qdrant.Qdrant`. - Replaced references/imports from `langchain_community` with either `langchain_core` or by moving the definitions to the `langchain_qdrant` package itself. - Updated the Qdrant vector store documentation to reflect the changes. ## Testing - `QDRANT_URL` and [`QDRANT_API_KEY`](pull/21649/head langchain-qdrant==0.0.1583e36bf6b
) env values need to be set to [run integration tests](d608c93d1f
) in the [cloud](https://cloud.qdrant.tech). - If a Qdrant instance is running at `http://localhost:6333`, the integration tests will use it too. - By default, tests use an [`in-memory`](https://github.com/qdrant/qdrant-client?tab=readme-ov-file#local-mode) instance(Not comprehensive). --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Erick Friis <erickfriis@gmail.com>
parent
fe8c9d621a
commit
edd68e4ad4
@ -0,0 +1 @@
|
||||
__pycache__
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,60 @@
|
||||
.PHONY: all format lint test tests integration_test integration_tests help
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
integration_test integration_tests: TEST_FILE = tests/integration_tests/
|
||||
|
||||
test tests integration_test integration_tests:
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
MYPY_CACHE=.mypy_cache
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/qdrant --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
lint_package: PYTHON_FILES=langchain_qdrant
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
poetry run ruff .
|
||||
poetry run ruff format $(PYTHON_FILES) --diff
|
||||
poetry run ruff --select I $(PYTHON_FILES)
|
||||
mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
poetry run ruff format $(PYTHON_FILES)
|
||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
poetry run codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
poetry run codespell --toml pyproject.toml -w
|
||||
|
||||
check_imports: $(shell find langchain_qdrant -name '*.py')
|
||||
poetry run python ./scripts/check_imports.py $^
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'check_imports - check imports'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'tests - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
||||
@echo 'integration_test - run integration tests'
|
||||
@echo 'integration_tests - run integration tests'
|
@ -0,0 +1,25 @@
|
||||
# langchain-qdrant
|
||||
|
||||
This package contains the LangChain integration with [Qdrant](https://qdrant.tech/).
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -U langchain-qdrant
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The `Qdrant` class exposes the connection to the Qdrant vector store.
|
||||
|
||||
```python
|
||||
from langchain_qdrant import Qdrant
|
||||
|
||||
embeddings = ... # use a LangChain Embeddings class
|
||||
|
||||
vectorstore = Qdrant.from_existing_collection(
|
||||
embeddings=embeddings,
|
||||
collection_name="<COLLECTION_NAME>",
|
||||
url="http://localhost:6333",
|
||||
)
|
||||
```
|
@ -0,0 +1,3 @@
|
||||
from langchain_qdrant.vectorstores import Qdrant
|
||||
|
||||
__all__ = ["Qdrant"]
|
@ -0,0 +1,70 @@
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
|
||||
|
||||
|
||||
def maximal_marginal_relevance(
|
||||
query_embedding: np.ndarray,
|
||||
embedding_list: list,
|
||||
lambda_mult: float = 0.5,
|
||||
k: int = 4,
|
||||
) -> List[int]:
|
||||
"""Calculate maximal marginal relevance."""
|
||||
if min(k, len(embedding_list)) <= 0:
|
||||
return []
|
||||
if query_embedding.ndim == 1:
|
||||
query_embedding = np.expand_dims(query_embedding, axis=0)
|
||||
similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0]
|
||||
most_similar = int(np.argmax(similarity_to_query))
|
||||
idxs = [most_similar]
|
||||
selected = np.array([embedding_list[most_similar]])
|
||||
while len(idxs) < min(k, len(embedding_list)):
|
||||
best_score = -np.inf
|
||||
idx_to_add = -1
|
||||
similarity_to_selected = cosine_similarity(embedding_list, selected)
|
||||
for i, query_score in enumerate(similarity_to_query):
|
||||
if i in idxs:
|
||||
continue
|
||||
redundant_score = max(similarity_to_selected[i])
|
||||
equation_score = (
|
||||
lambda_mult * query_score - (1 - lambda_mult) * redundant_score
|
||||
)
|
||||
if equation_score > best_score:
|
||||
best_score = equation_score
|
||||
idx_to_add = i
|
||||
idxs.append(idx_to_add)
|
||||
selected = np.append(selected, [embedding_list[idx_to_add]], axis=0)
|
||||
return idxs
|
||||
|
||||
|
||||
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
||||
if len(X) == 0 or len(Y) == 0:
|
||||
return np.array([])
|
||||
|
||||
X = np.array(X)
|
||||
Y = np.array(Y)
|
||||
if X.shape[1] != Y.shape[1]:
|
||||
raise ValueError(
|
||||
f"Number of columns in X and Y must be the same. X has shape {X.shape} "
|
||||
f"and Y has shape {Y.shape}."
|
||||
)
|
||||
try:
|
||||
import simsimd as simd # type: ignore
|
||||
|
||||
X = np.array(X, dtype=np.float32)
|
||||
Y = np.array(Y, dtype=np.float32)
|
||||
Z = 1 - simd.cdist(X, Y, metric="cosine")
|
||||
if isinstance(Z, float):
|
||||
return np.array([Z])
|
||||
return np.array(Z)
|
||||
except ImportError:
|
||||
X_norm = np.linalg.norm(X, axis=1)
|
||||
Y_norm = np.linalg.norm(Y, axis=1)
|
||||
# Ignore divide by zero errors run time warnings as those are handled below.
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
|
||||
similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
|
||||
return similarity
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,95 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-qdrant"
|
||||
version = "0.0.1"
|
||||
description = "An integration package connecting Qdrant and LangChain"
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
license = "MIT"
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/qdrant"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain-core = ">=0.1.52,<0.3"
|
||||
qdrant-client = "^1.9.0"
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.3.0"
|
||||
freezegun = "^1.2.2"
|
||||
pytest-mock = "^3.10.0"
|
||||
syrupy = "^4.0.2"
|
||||
pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
requests = "^2.31.0"
|
||||
|
||||
[tool.poetry.group.codespell]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.codespell.dependencies]
|
||||
codespell = "^2.2.0"
|
||||
|
||||
[tool.poetry.group.test_integration]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test_integration.dependencies]
|
||||
|
||||
[tool.poetry.group.lint]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.lint.dependencies]
|
||||
ruff = "^0.1.5"
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = "^0.991"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = true
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = [
|
||||
"tests/*",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# --strict-markers will raise errors on unknown marks.
|
||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
||||
#
|
||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
||||
# --strict-config any warnings encountered while parsing the `pytest`
|
||||
# section of the configuration file raise errors.
|
||||
#
|
||||
# https://github.com/tophat/syrupy
|
||||
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
||||
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
||||
# Registering custom markers.
|
||||
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"asyncio: mark tests as requiring asyncio",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
asyncio_mode = "auto"
|
@ -0,0 +1,17 @@
|
||||
import sys
|
||||
import traceback
|
||||
from importlib.machinery import SourceFileLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
has_failure = False
|
||||
for file in files:
|
||||
try:
|
||||
SourceFileLoader("x", file).load_module()
|
||||
except Exception:
|
||||
has_faillure = True
|
||||
print(file)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
|
||||
sys.exit(1 if has_failure else 0)
|
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This script searches for lines starting with "import pydantic" or "from pydantic"
|
||||
# in tracked files within a Git repository.
|
||||
#
|
||||
# Usage: ./scripts/check_pydantic.sh /path/to/repository
|
||||
|
||||
# Check if a path argument is provided
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 /path/to/repository"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repository_path="$1"
|
||||
|
||||
# Search for lines matching the pattern within the specified repository
|
||||
result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
|
||||
|
||||
# Check if any matching lines were found
|
||||
if [ -n "$result" ]; then
|
||||
echo "ERROR: The following lines need to be updated:"
|
||||
echo "$result"
|
||||
echo "Please replace the code with an import from langchain_core.pydantic_v1."
|
||||
echo "For example, replace 'from pydantic import BaseModel'"
|
||||
echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
|
||||
exit 1
|
||||
fi
|
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
# Initialize a variable to keep track of errors
|
||||
errors=0
|
||||
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 0
|
||||
fi
|
@ -0,0 +1,123 @@
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_returns_all_ids(
|
||||
batch_size: int, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
ids = await docsearch.aadd_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_duplicated_texts(
|
||||
vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
collection_name = uuid.uuid4().hex
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = await vec_store.aadd_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_ids(
|
||||
batch_size: int, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
collection_name = uuid.uuid4().hex
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = await vec_store.aadd_texts(
|
||||
["abc", "def"], ids=ids, batch_size=batch_size
|
||||
)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
await vec_store.aadd_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1,266 @@
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from langchain_qdrant.vectorstores import QdrantException
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_duplicated_texts(qdrant_location: str) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores duplicated texts separately."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores provided ids."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores named vectors if name is provided."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_reuses_same_collection(
|
||||
location: str, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts reuses the same collection"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
location: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if dimensionality does not
|
||||
match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
async def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
location: str,
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=first_vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=second_vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
async def test_qdrant_from_texts_raises_error_on_different_distance(
|
||||
location: str,
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
distance_func="Cosine",
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
distance_func="Euclid",
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
location: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
location=location,
|
||||
)
|
||||
|
||||
client = QdrantClient(location=location, api_key=os.getenv("QDRANT_API_KEY"))
|
||||
assert 2 == client.count(collection_name).count
|
||||
vector_params = client.get_collection(collection_name).config.params.vectors
|
||||
if vector_name is not None:
|
||||
vector_params = vector_params[vector_name] # type: ignore[index]
|
||||
assert 5 == vector_params.size # type: ignore[union-attr]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await Qdrant.afrom_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
@ -0,0 +1,51 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
distance_func="EUCLID", # Euclid distance used to avoid normalization
|
||||
)
|
||||
output = await docsearch.amax_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
@ -0,0 +1,305 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert_documents_equals([document], [Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
output = await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
@ -0,0 +1,79 @@
|
||||
from typing import List
|
||||
|
||||
import requests # type: ignore
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
|
||||
def qdrant_running_locally() -> bool:
|
||||
"""Check if Qdrant is running at http://localhost:6333."""
|
||||
|
||||
try:
|
||||
response = requests.get("http://localhost:6333", timeout=10.0)
|
||||
response_json = response.json()
|
||||
return response_json.get("title") == "qdrant - vector search engine"
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
return False
|
||||
|
||||
|
||||
def assert_documents_equals(actual: List[Document], expected: List[Document]): # type: ignore[no-untyped-def]
|
||||
assert len(actual) == len(expected)
|
||||
|
||||
for actual_doc, expected_doc in zip(actual, expected):
|
||||
assert actual_doc.page_content == expected_doc.page_content
|
||||
|
||||
assert "_id" in actual_doc.metadata
|
||||
assert "_collection_name" in actual_doc.metadata
|
||||
|
||||
actual_doc.metadata.pop("_id")
|
||||
actual_doc.metadata.pop("_collection_name")
|
||||
|
||||
assert actual_doc.metadata == expected_doc.metadata
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return simple embeddings.
|
||||
Embeddings encode each text as its index."""
|
||||
return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return self.embed_documents(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return constant query embeddings.
|
||||
Embeddings are identical to embed_documents(texts)[0].
|
||||
Distance to each text will be that text's index,
|
||||
as it was passed to embed_documents."""
|
||||
return [float(1.0)] * 9 + [float(0.0)]
|
||||
|
||||
async def aembed_query(self, text: str) -> List[float]:
|
||||
return self.embed_query(text)
|
||||
|
||||
|
||||
class ConsistentFakeEmbeddings(FakeEmbeddings):
|
||||
"""Fake embeddings which remember all the texts seen so far to return consistent
|
||||
vectors for the same texts."""
|
||||
|
||||
def __init__(self, dimensionality: int = 10) -> None:
|
||||
self.known_texts: List[str] = []
|
||||
self.dimensionality = dimensionality
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return consistent embeddings for each text seen so far."""
|
||||
out_vectors = []
|
||||
for text in texts:
|
||||
if text not in self.known_texts:
|
||||
self.known_texts.append(text)
|
||||
vector = [float(1.0)] * (self.dimensionality - 1) + [
|
||||
float(self.known_texts.index(text))
|
||||
]
|
||||
out_vectors.append(vector)
|
||||
return out_vectors
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return consistent embeddings for the text, if seen before, or a constant
|
||||
one if the text is unknown."""
|
||||
return self.embed_documents([text])[0]
|
@ -0,0 +1,15 @@
|
||||
import os
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
def pytest_sessionfinish() -> None:
|
||||
"""Clean up all collections after the test session."""
|
||||
for location in qdrant_locations():
|
||||
client = QdrantClient(location=location, api_key=os.getenv("QDRANT_API_KEY"))
|
||||
collections = client.get_collections().collections
|
||||
|
||||
for collection in collections:
|
||||
client.delete_collection(collection.name)
|
@ -0,0 +1,25 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from tests.integration_tests.common import qdrant_running_locally
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def qdrant_locations(use_in_memory: bool = True) -> List[str]:
|
||||
locations = []
|
||||
|
||||
if use_in_memory:
|
||||
logger.info("Running Qdrant tests with in-memory mode.")
|
||||
locations.append(":memory:")
|
||||
|
||||
if qdrant_running_locally():
|
||||
logger.info("Running Qdrant tests with local Qdrant instance.")
|
||||
locations.append("http://localhost:6333")
|
||||
|
||||
if qdrant_url := os.getenv("QDRANT_URL"):
|
||||
logger.info(f"Running Qdrant tests with Qdrant instance at {qdrant_url}.")
|
||||
locations.append(qdrant_url)
|
||||
|
||||
return locations
|
@ -0,0 +1,135 @@
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_documents_extends_existing_collection(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
new_texts = ["foobar", "foobaz"]
|
||||
docsearch.add_documents(
|
||||
[Document(page_content=content) for content in new_texts], batch_size=batch_size
|
||||
)
|
||||
output = docsearch.similarity_search("foobar", k=1)
|
||||
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||
# same as "foobar" embedding
|
||||
assert_documents_equals(output, [Document(page_content="foobar")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_returns_all_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = docsearch.add_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = vec_store.add_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = vec_store.add_texts(["abc", "def"], ids=ids, batch_size=batch_size)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
vec_store.add_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1,7 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.compile
|
||||
def test_placeholder() -> None:
|
||||
"""Used for compiling integration tests without running any real tests."""
|
||||
pass
|
@ -0,0 +1,58 @@
|
||||
import uuid
|
||||
from typing import Callable, Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), None),
|
||||
(ConsistentFakeEmbeddings().embed_query, None),
|
||||
(None, ConsistentFakeEmbeddings().embed_query),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant may accept different types for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), ConsistentFakeEmbeddings().embed_query),
|
||||
(None, None),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface_raises_value_error(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant requires only one method for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
@ -0,0 +1,37 @@
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_existing_collection_uses_same_collection(vector_name: str) -> None:
|
||||
"""Test if the Qdrant.from_existing_collection reuses the same collection."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
docs = ["foo"]
|
||||
qdrant = Qdrant.from_texts(
|
||||
docs,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del qdrant
|
||||
|
||||
qdrant = Qdrant.from_existing_collection(
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
qdrant.add_texts(["baz", "bar"])
|
||||
del qdrant
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 3 == client.count(collection_name).count
|
@ -0,0 +1,288 @@
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from langchain_qdrant.vectorstores import QdrantException
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
"""Test end to end Qdrant.from_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_reuses_same_collection(vector_name: Optional[str]) -> None:
|
||||
"""Test if Qdrant.from_texts reuses the same collection"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if dimensionality does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=first_vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=second_vector_name,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_raises_error_on_different_distance() -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Cosine",
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException) as excinfo:
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Euclid",
|
||||
)
|
||||
|
||||
expected_message = (
|
||||
"configured for COSINE similarity, but requested EUCLID. Please set "
|
||||
"`distance_func` parameter to `COSINE`"
|
||||
)
|
||||
assert expected_message in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
def test_from_texts_passed_optimizers_config_and_on_disk_payload(location: str) -> None:
|
||||
from qdrant_client import models
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
optimizers_config = models.OptimizersConfigDiff(memmap_threshold=1000)
|
||||
vec_store = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
optimizers_config=optimizers_config,
|
||||
on_disk_payload=True,
|
||||
on_disk=True,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
collection_info = vec_store.client.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors.on_disk is True # type: ignore
|
||||
assert collection_info.config.optimizer_config.memmap_threshold == 1000
|
||||
assert collection_info.config.params.on_disk_payload is True
|
@ -0,0 +1,67 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from qdrant_client import models
|
||||
|
||||
filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page",
|
||||
match=models.MatchValue(
|
||||
value=2,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
distance_func="EUCLID", # Euclid distance used to avoid normalization
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
||||
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[Document(page_content="baz", metadata={"page": 2})],
|
||||
)
|
@ -0,0 +1,284 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert_documents_equals(actual=[document], expected=[Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
@ -0,0 +1,7 @@
|
||||
from langchain_qdrant import __all__
|
||||
|
||||
EXPECTED_ALL = ["Qdrant"]
|
||||
|
||||
|
||||
def test_all_imports() -> None:
|
||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
Loading…
Reference in New Issue