acho98 2 weeks ago committed by GitHub
commit 773e2e27a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,71 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Clova Embeddings\n",
"[Clova](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary) offers an embeddings service\n",
"\n",
"This example goes over how to use LangChain to interact with Clova inference for text embedding.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"CLOVA_EMB_API_KEY\"] = \"\"\n",
"os.environ[\"CLOVA_EMB_APIGW_API_KEY\"] = \"\"\n",
"os.environ[\"CLOVA_EMB_APP_ID\"] =\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.embeddings import ClovaEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings = ClovaEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query_text = \"This is a test query.\"\n",
"query_result = embeddings.embed_query(query_text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"document_text = [\"This is a test doc1.\", \"This is a test doc2.\"]\n",
"document_result = embeddings.embed_documents([document_text])"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -43,6 +43,9 @@ if TYPE_CHECKING:
from langchain_community.embeddings.clarifai import (
ClarifaiEmbeddings,
)
from langchain_community.embeddings.clova import (
ClovaEmbeddings,
)
from langchain_community.embeddings.cohere import (
CohereEmbeddings,
)
@ -229,6 +232,7 @@ __all__ = [
"BedrockEmbeddings",
"BookendEmbeddings",
"ClarifaiEmbeddings",
"ClovaEmbeddings",
"CohereEmbeddings",
"DashScopeEmbeddings",
"DatabricksEmbeddings",
@ -305,6 +309,7 @@ _module_lookup = {
"BedrockEmbeddings": "langchain_community.embeddings.bedrock",
"BookendEmbeddings": "langchain_community.embeddings.bookend",
"ClarifaiEmbeddings": "langchain_community.embeddings.clarifai",
"ClovaEmbeddings": "langchain_community.embeddings.clova",
"CohereEmbeddings": "langchain_community.embeddings.cohere",
"DashScopeEmbeddings": "langchain_community.embeddings.dashscope",
"DatabricksEmbeddings": "langchain_community.embeddings.databricks",

@ -0,0 +1,127 @@
from __future__ import annotations
from typing import Dict, List, Optional
import requests
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, SecretStr, root_validator
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
class ClovaEmbeddings(BaseModel, Embeddings):
"""
Clova's embedding service.
To use this service,
you should have the following environment variables
set with your API tokens and application ID,
or pass them as named parameters to the constructor:
- ``CLOVA_EMB_API_KEY``: API key for accessing Clova's embedding service.
- ``CLOVA_EMB_APIGW_API_KEY``: API gateway key for enhanced security.
- ``CLOVA_EMB_APP_ID``: Application ID for identifying your application.
Example:
.. code-block:: python
from langchain_community.embeddings import ClovaEmbeddings
embeddings = ClovaEmbeddings(
clova_emb_api_key='your_clova_emb_api_key',
clova_emb_apigw_api_key='your_clova_emb_apigw_api_key',
app_id='your_app_id'
)
query_text = "This is a test query."
query_result = embeddings.embed_query(query_text)
document_text = "This is a test document."
document_result = embeddings.embed_documents([document_text])
"""
endpoint_url: str = "https://clovastudio.apigw.ntruss.com/testapp/v1/api-tools/embedding"
"""Endpoint URL to use."""
model: str = "clir-emb-dolphin"
"""Embedding model name to use."""
clova_emb_api_key: Optional[SecretStr] = None
clova_emb_apigw_api_key: Optional[SecretStr] = None
app_id: Optional[SecretStr] = None
"""API Key for Clova API."""
class Config:
extra = Extra.forbid
@root_validator(pre=True, allow_reuse=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate api key exists in environment."""
values["clova_emb_api_key"] = convert_to_secret_str(
get_from_dict_or_env(values,
"clova_emb_api_key",
"CLOVA_EMB_API_KEY")
)
values["clova_emb_apigw_api_key"] = convert_to_secret_str(
get_from_dict_or_env(values,
"clova_emb_apigw_api_key",
"CLOVA_EMB_APIGW_API_KEY")
)
values["app_id"] = convert_to_secret_str(
get_from_dict_or_env(values, "app_id", "CLOVA_EMB_APP_ID")
)
return values
def embed(self, texts: List[str]) -> List[List[float]]:
return self.embed_documents(texts)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""
Embed a list of texts and return their embeddings.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
embeddings = []
for text in texts:
embeddings.append(self._embed_text(text))
return embeddings
def embed_query(self, text: str) -> List[float]:
"""
Embed a single query text and return its embedding.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self._embed_text(text)
def _embed_text(self, text: str) -> List[float]:
"""
Internal method to call the embedding API and handle the response.
"""
payload = {"text": text}
# HTTP headers for authorization
headers = {
"X-NCP-CLOVASTUDIO-API-KEY": self.clova_emb_api_key.get_secret_value(),
"X-NCP-APIGW-API-KEY": self.clova_emb_apigw_api_key.get_secret_value(),
"Content-Type": "application/json"
}
# send request
response = requests.post(
f"{self.endpoint_url}/{self.model}/{self.app_id.get_secret_value()}",
headers=headers,
json=payload
)
# check for errors
if response.status_code == 200:
response_data = response.json()
if 'result' in response_data and 'embedding' in response_data['result']:
return response_data['result']['embedding']
raise ValueError(
f"API request failed with status {response.status_code}: {response.text}"
)
Loading…
Cancel
Save