Ndb enterprise (#21233)

Description: Adds NeuralDBClientVectorStore to the langchain, which is
our enterprise client.

---------

Co-authored-by: kartikTAI <129414343+kartikTAI@users.noreply.github.com>
Co-authored-by: Kartik Sarangmath <kartik@thirdai.com>
pull/21048/head^2
Yash 2 weeks ago committed by GitHub
parent 74044e44a5
commit cb31c3611f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -236,6 +236,7 @@ if TYPE_CHECKING:
TencentVectorDB,
)
from langchain_community.vectorstores.thirdai_neuraldb import (
NeuralDBClientVectorStore,
NeuralDBVectorStore,
)
from langchain_community.vectorstores.tidb_vector import (
@ -345,6 +346,7 @@ __all__ = [
"MyScale",
"MyScaleSettings",
"Neo4jVector",
"NeuralDBClientVectorStore",
"NeuralDBVectorStore",
"OracleVS",
"OpenSearchVectorSearch",
@ -441,6 +443,7 @@ _module_lookup = {
"MyScale": "langchain_community.vectorstores.myscale",
"MyScaleSettings": "langchain_community.vectorstores.myscale",
"Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
"NeuralDBClientVectorStore": "langchain_community.vectorstores.thirdai_neuraldb", # noqa: E501
"NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
"OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search", # noqa: E501
"OracleVS": "langchain_community.vectorstores.oraclevs",

@ -166,7 +166,7 @@ class NeuralDBVectorStore(VectorStore):
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
@root_validator()
@root_validator(allow_reuse=True)
def validate_environments(cls, values: Dict) -> Dict:
"""Validate ThirdAI environment variables."""
values["thirdai_key"] = convert_to_secret_str(
@ -314,3 +314,161 @@ class NeuralDBVectorStore(VectorStore):
path: path on disk to save the NeuralDB instance to.
"""
self.db.save(path)
class NeuralDBClientVectorStore(VectorStore):
"""Vectorstore that uses ThirdAI's NeuralDB Enterprise Python Client for NeuralDBs.
To use, you should have the ``thirdai[neural_db]`` python package installed.
Example:
.. code-block:: python
from langchain_community.vectorstores import NeuralDBClientVectorStore
from thirdai.neural_db import ModelBazaar, NeuralDBClient
bazaar = ModelBazaar(base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/")
bazaar.log_in(email="user@thirdai.com", password="1234")
ndb_client = NeuralDBClient(
deployment_identifier="user/model-0:user/deployment-0",
base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/",
bazaar=bazaar
)
vectorstore = NeuralDBClientVectorStore(db=ndb_client)
retriever = vectorstore.as_retriever(search_kwargs={'k':5})
"""
def __init__(self, db: Any) -> None:
self.db = db
db: Any = None #: :meta private:
"""NeuralDB Client instance"""
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
underscore_attrs_are_private = True
def similarity_search(
self, query: str, k: int = 10, **kwargs: Any
) -> List[Document]:
"""Retrieve {k} contexts with for a given query
Args:
query: Query to submit to the model
k: The max number of context results to retrieve. Defaults to 10.
"""
try:
references = self.db.search(query=query, top_k=k, **kwargs)["references"]
return [
Document(
page_content=ref["text"],
metadata={
"id": ref["id"],
"source": ref["source"],
"metadata": ref["metadata"],
"score": ref["source"],
"context": ref["context"],
},
)
for ref in references
]
except Exception as e:
raise ValueError(f"Error while retrieving documents: {e}") from e
def insert(self, documents: List[Dict[str, Any]]): # type: ignore[no-untyped-def, no-untyped-def]
"""
Inserts documents into the VectorStore and return the corresponding Sources.
Args:
documents (List[Dict[str, Any]]): A list of dictionaries that
represent documents to be inserted to the VectorStores.
The document dictionaries must be in the following format:
{"document_type": "DOCUMENT_TYPE", **kwargs} where "DOCUMENT_TYPE"
is one of the following:
"PDF", "CSV", "DOCX", "URL", "SentenceLevelPDF", "SentenceLevelDOCX",
"Unstructured", "InMemoryText".
The kwargs for each document type are shown below:
class PDF(Document):
document_type: Literal["PDF"]
path: str
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
version: str = "v1"
chunk_size: int = 100
stride: int = 40
emphasize_first_words: int = 0
ignore_header_footer: bool = True
ignore_nonstandard_orientation: bool = True
class CSV(Document):
document_type: Literal["CSV"]
path: str
id_column: Optional[str] = None
strong_columns: Optional[List[str]] = None
weak_columns: Optional[List[str]] = None
reference_columns: Optional[List[str]] = None
save_extra_info: bool = True
metadata: Optional[dict[str, Any]] = None
has_offset: bool = False
on_disk: bool = False
class DOCX(Document):
document_type: Literal["DOCX"]
path: str
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
class URL(Document):
document_type: Literal["URL"]
url: str
save_extra_info: bool = True
title_is_strong: bool = False
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
class SentenceLevelPDF(Document):
document_type: Literal["SentenceLevelPDF"]
path: str
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
class SentenceLevelDOCX(Document):
document_type: Literal["SentenceLevelDOCX"]
path: str
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
class Unstructured(Document):
document_type: Literal["Unstructured"]
path: str
save_extra_info: bool = True
metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
class InMemoryText(Document):
document_type: Literal["InMemoryText"]
name: str
texts: list[str]
metadatas: Optional[list[dict[str, Any]]] = None
global_metadata: Optional[dict[str, Any]] = None
on_disk: bool = False
For Document types with the arg "path", ensure that
the path exists on your local machine.
"""
return self.db.insert(documents)
def remove_documents(self, source_ids: List[str]): # type: ignore[no-untyped-def]
"""
Deletes documents from the VectorStore using source ids.
Args:
files (List[str]): A list of source ids to delete from the VectorStore.
"""
self.db.delete(source_ids)

@ -58,6 +58,7 @@ EXPECTED_ALL = [
"MyScale",
"MyScaleSettings",
"Neo4jVector",
"NeuralDBClientVectorStore",
"NeuralDBVectorStore",
"OpenSearchVectorSearch",
"OracleVS",

@ -93,6 +93,7 @@ _EXPECTED = [
"AzureCosmosDBVectorSearch",
"VectorStore",
"Yellowbrick",
"NeuralDBClientVectorStore",
"NeuralDBVectorStore",
"CouchbaseVectorStore",
]

@ -66,6 +66,7 @@ if TYPE_CHECKING:
MyScale,
MyScaleSettings,
Neo4jVector,
NeuralDBClientVectorStore,
NeuralDBVectorStore,
OpenSearchVectorSearch,
PGEmbedding,
@ -142,6 +143,7 @@ DEPRECATED_LOOKUP = {
"MyScale": "langchain_community.vectorstores",
"MyScaleSettings": "langchain_community.vectorstores",
"Neo4jVector": "langchain_community.vectorstores",
"NeuralDBClientVectorStore": "langchain_community.vectorstores",
"NeuralDBVectorStore": "langchain_community.vectorstores",
"NEuralDBVectorStore": "langchain_community.vectorstores",
"OpenSearchVectorSearch": "langchain_community.vectorstores",
@ -224,6 +226,7 @@ __all__ = [
"MyScale",
"MyScaleSettings",
"Neo4jVector",
"NeuralDBClientVectorStore",
"NeuralDBVectorStore",
"OpenSearchVectorSearch",
"PGEmbedding",

@ -42,6 +42,7 @@ _EXPECTED = [
"MyScale",
"MyScaleSettings",
"Neo4jVector",
"NeuralDBClientVectorStore",
"NeuralDBVectorStore",
"OpenSearchVectorSearch",
"PGEmbedding",

Loading…
Cancel
Save