You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/tests/integration_tests/retrievers/document_compressors/test_base.py

30 lines
1.2 KiB
Python

"""Integration test for compression pipelines."""
from langchain.retrievers.document_compressors import (
DocumentCompressorPipeline,
EmbeddingsFilter,
)
from langchain_core.documents import Document
from langchain_text_splitters.character import CharacterTextSplitter
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import OpenAIEmbeddings
def test_document_compressor_pipeline() -> None:
embeddings = OpenAIEmbeddings()
splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.8)
pipeline_filter = DocumentCompressorPipeline(
transformers=[splitter, redundant_filter, relevant_filter]
)
texts = [
"This sentence is about cows",
"This sentence was about cows",
"foo bar baz",
]
docs = [Document(page_content=". ".join(texts))]
actual = pipeline_filter.compress_documents(docs, "Tell me about farm animals")
assert len(actual) == 1
assert actual[0].page_content in texts[:2]