You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/core/tests/unit_tests/document_loaders/test_base.py

68 lines
2.0 KiB
Python

"""Test Base Schema of documents."""
from typing import Iterator, List
import pytest
from langchain_core.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_core.document_loaders.blob_loaders import Blob
from langchain_core.documents import Document
def test_base_blob_parser() -> None:
"""Verify that the eager method is hooked up to the lazy method by default."""
class MyParser(BaseBlobParser):
"""A simple parser that returns a single document."""
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazy parsing interface."""
yield Document(
page_content="foo",
)
parser = MyParser()
assert isinstance(parser.lazy_parse(Blob(data="who?")), Iterator)
# We're verifying that the eager method is hooked up to the lazy method by default.
docs = parser.parse(Blob(data="who?"))
assert len(docs) == 1
assert docs[0].page_content == "foo"
def test_default_lazy_load() -> None:
class FakeLoader(BaseLoader):
def load(self) -> List[Document]:
return [
Document(page_content="foo"),
Document(page_content="bar"),
]
loader = FakeLoader()
docs = list(loader.lazy_load())
assert docs == [Document(page_content="foo"), Document(page_content="bar")]
def test_lazy_load_not_implemented() -> None:
class FakeLoader(BaseLoader):
pass
loader = FakeLoader()
with pytest.raises(NotImplementedError):
loader.lazy_load()
async def test_default_aload() -> None:
class FakeLoader(BaseLoader):
def lazy_load(self) -> Iterator[Document]:
yield from [
Document(page_content="foo"),
Document(page_content="bar"),
]
loader = FakeLoader()
docs = loader.load()
assert docs == [Document(page_content="foo"), Document(page_content="bar")]
assert docs == [doc async for doc in loader.alazy_load()]
assert docs == await loader.aload()