mirror of https://github.com/hwchase17/langchain
Add String Distance and Embedding Evaluators (#7123)
Add a string evaluator and pairwise string evaluator implementation for: - Embedding distance - String distance Update docspull/7390/head
parent
fb6e63dc36
commit
4789c99bc2
@ -0,0 +1,12 @@
|
||||
"""Evaluators that measure embedding distances."""
|
||||
from langchain.evaluation.embedding_distance.base import (
|
||||
EmbeddingDistance,
|
||||
EmbeddingDistanceEvalChain,
|
||||
PairwiseEmbeddingDistanceEvalChain,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"EmbeddingDistance",
|
||||
"EmbeddingDistanceEvalChain",
|
||||
"PairwiseEmbeddingDistanceEvalChain",
|
||||
]
|
@ -0,0 +1,438 @@
|
||||
"""A chain for comparing the output of two models using embeddings."""
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
from pydantic import Field, root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForChainRun,
|
||||
CallbackManagerForChainRun,
|
||||
Callbacks,
|
||||
)
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
||||
from langchain.math_utils import cosine_similarity
|
||||
|
||||
|
||||
class EmbeddingDistance(str, Enum):
|
||||
"""Embedding Distance Metric.
|
||||
|
||||
Attributes:
|
||||
COSINE: Cosine distance metric.
|
||||
EUCLIDEAN: Euclidean distance metric.
|
||||
MANHATTAN: Manhattan distance metric.
|
||||
CHEBYSHEV: Chebyshev distance metric.
|
||||
HAMMING: Hamming distance metric.
|
||||
"""
|
||||
|
||||
COSINE = "cosine"
|
||||
EUCLIDEAN = "euclidean"
|
||||
MANHATTAN = "manhattan"
|
||||
CHEBYSHEV = "chebyshev"
|
||||
HAMMING = "hamming"
|
||||
|
||||
|
||||
class _EmbeddingDistanceChainMixin(Chain):
|
||||
"""Shared functionality for embedding distance evaluators.
|
||||
|
||||
Attributes:
|
||||
embeddings (Embeddings): The embedding objects to vectorize the outputs.
|
||||
distance_metric (EmbeddingDistance): The distance metric to use
|
||||
for comparing the embeddings.
|
||||
"""
|
||||
|
||||
embeddings: Embeddings = Field(default_factory=OpenAIEmbeddings)
|
||||
distance_metric: EmbeddingDistance = Field(default=EmbeddingDistance.COSINE)
|
||||
|
||||
class Config:
|
||||
"""Permit embeddings to go unvalidated."""
|
||||
|
||||
arbitrary_types_allowed: bool = True
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys of the chain.
|
||||
|
||||
Returns:
|
||||
List[str]: The output keys.
|
||||
"""
|
||||
return ["score"]
|
||||
|
||||
@root_validator
|
||||
def _validate_distance_metric(cls, values: dict) -> dict:
|
||||
"""Validate the distance metric.
|
||||
|
||||
Args:
|
||||
values (dict): The values to validate.
|
||||
|
||||
Returns:
|
||||
dict: The validated values.
|
||||
"""
|
||||
values["distance_metric"] = values["distance_metric"].lower()
|
||||
return values
|
||||
|
||||
def _get_metric(self, metric: EmbeddingDistance) -> Any:
|
||||
"""Get the metric function for the given metric name.
|
||||
|
||||
Args:
|
||||
metric (EmbeddingDistance): The metric name.
|
||||
|
||||
Returns:
|
||||
Any: The metric function.
|
||||
"""
|
||||
metrics = {
|
||||
EmbeddingDistance.COSINE: self._cosine_distance,
|
||||
EmbeddingDistance.EUCLIDEAN: self._euclidean_distance,
|
||||
EmbeddingDistance.MANHATTAN: self._manhattan_distance,
|
||||
EmbeddingDistance.CHEBYSHEV: self._chebyshev_distance,
|
||||
EmbeddingDistance.HAMMING: self._hamming_distance,
|
||||
}
|
||||
if metric in metrics:
|
||||
return metrics[metric]
|
||||
else:
|
||||
raise ValueError(f"Invalid metric: {metric}")
|
||||
|
||||
@staticmethod
|
||||
def _cosine_distance(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||||
"""Compute the cosine distance between two vectors.
|
||||
|
||||
Args:
|
||||
a (np.ndarray): The first vector.
|
||||
b (np.ndarray): The second vector.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The cosine distance.
|
||||
"""
|
||||
return 1.0 - cosine_similarity(a, b)
|
||||
|
||||
@staticmethod
|
||||
def _euclidean_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
"""Compute the Euclidean distance between two vectors.
|
||||
|
||||
Args:
|
||||
a (np.ndarray): The first vector.
|
||||
b (np.ndarray): The second vector.
|
||||
|
||||
Returns:
|
||||
np.floating: The Euclidean distance.
|
||||
"""
|
||||
return np.linalg.norm(a - b)
|
||||
|
||||
@staticmethod
|
||||
def _manhattan_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
"""Compute the Manhattan distance between two vectors.
|
||||
|
||||
Args:
|
||||
a (np.ndarray): The first vector.
|
||||
b (np.ndarray): The second vector.
|
||||
|
||||
Returns:
|
||||
np.floating: The Manhattan distance.
|
||||
"""
|
||||
return np.sum(np.abs(a - b))
|
||||
|
||||
@staticmethod
|
||||
def _chebyshev_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
"""Compute the Chebyshev distance between two vectors.
|
||||
|
||||
Args:
|
||||
a (np.ndarray): The first vector.
|
||||
b (np.ndarray): The second vector.
|
||||
|
||||
Returns:
|
||||
np.floating: The Chebyshev distance.
|
||||
"""
|
||||
return np.max(np.abs(a - b))
|
||||
|
||||
@staticmethod
|
||||
def _hamming_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
"""Compute the Hamming distance between two vectors.
|
||||
|
||||
Args:
|
||||
a (np.ndarray): The first vector.
|
||||
b (np.ndarray): The second vector.
|
||||
|
||||
Returns:
|
||||
np.floating: The Hamming distance.
|
||||
"""
|
||||
return np.mean(a != b)
|
||||
|
||||
def _compute_score(self, vectors: np.ndarray) -> float:
|
||||
"""Compute the score based on the distance metric.
|
||||
|
||||
Args:
|
||||
vectors (np.ndarray): The input vectors.
|
||||
|
||||
Returns:
|
||||
float: The computed score.
|
||||
"""
|
||||
metric = self._get_metric(self.distance_metric)
|
||||
score = metric(vectors[0].reshape(1, -1), vectors[1].reshape(1, -1)).item()
|
||||
return score
|
||||
|
||||
|
||||
class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
||||
"""Use embedding distances to score semantic difference between
|
||||
a prediction and reference.
|
||||
|
||||
Examples:
|
||||
>>> chain = EmbeddingDistanceEvalChain()
|
||||
>>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
|
||||
>>> print(result)
|
||||
{'score': 0.5}
|
||||
"""
|
||||
|
||||
@property
|
||||
def requires_reference(self) -> bool:
|
||||
"""Return whether the chain requires a reference.
|
||||
|
||||
Returns:
|
||||
bool: True if a reference is required, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys of the chain.
|
||||
|
||||
Returns:
|
||||
List[str]: The input keys.
|
||||
"""
|
||||
return ["prediction", "reference"]
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute the score for a prediction and reference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input data.
|
||||
run_manager (Optional[CallbackManagerForChainRun], optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
vectors = np.array(
|
||||
self.embeddings.embed_documents(
|
||||
[inputs["prediction"], inputs["prediction_b"]]
|
||||
)
|
||||
)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Asynchronously compute the score for a prediction and reference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input data.
|
||||
run_manager (AsyncCallbackManagerForChainRun, optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
embedded = await self.embeddings.aembed_documents(
|
||||
[inputs["prediction"], inputs["prediction_b"]]
|
||||
)
|
||||
vectors = np.array(embedded)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the embedding distance between a prediction and
|
||||
reference.
|
||||
|
||||
Args:
|
||||
prediction (str): The output string from the first model.
|
||||
reference (str): The reference string (required)
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing:
|
||||
- score: The embedding distance between the two
|
||||
predictions.
|
||||
"""
|
||||
return self(
|
||||
inputs={"prediction": prediction, "reference": reference},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
async def _aevaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Asynchronously evaluate the embedding distance between
|
||||
a prediction and reference.
|
||||
|
||||
Args:
|
||||
prediction (str): The output string from the first model.
|
||||
reference (str): The output string from the second model.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing:
|
||||
- score: The embedding distance between the two
|
||||
predictions.
|
||||
"""
|
||||
return await self.acall(
|
||||
inputs={"prediction": prediction, "reference": reference},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
|
||||
class PairwiseEmbeddingDistanceEvalChain(
|
||||
_EmbeddingDistanceChainMixin, PairwiseStringEvaluator
|
||||
):
|
||||
"""Use embedding distances to score semantic difference between two predictions.
|
||||
|
||||
Examples:
|
||||
>>> chain = PairwiseEmbeddingDistanceEvalChain()
|
||||
>>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
|
||||
>>> print(result)
|
||||
{'score': 0.5}
|
||||
"""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys of the chain.
|
||||
|
||||
Returns:
|
||||
List[str]: The input keys.
|
||||
"""
|
||||
return ["prediction", "prediction_b"]
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute the score for two predictions.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input data.
|
||||
run_manager (CallbackManagerForChainRun, optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
vectors = np.array(
|
||||
self.embeddings.embed_documents(
|
||||
[inputs["prediction"], inputs["prediction_b"]]
|
||||
)
|
||||
)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Asynchronously compute the score for two predictions.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input data.
|
||||
run_manager (AsyncCallbackManagerForChainRun, optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
embedded = await self.embeddings.aembed_documents(
|
||||
[inputs["prediction"], inputs["prediction_b"]]
|
||||
)
|
||||
vectors = np.array(embedded)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
def _evaluate_string_pairs(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
prediction_b: str,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the embedding distance between two predictions.
|
||||
|
||||
Args:
|
||||
prediction (str): The output string from the first model.
|
||||
prediction_b (str): The output string from the second model.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
tags (List[str], optional): Tags to apply to traces
|
||||
metadata (Dict[str, Any], optional): metadata to apply to
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing:
|
||||
- score: The embedding distance between the two
|
||||
predictions.
|
||||
"""
|
||||
result = self(
|
||||
inputs={"prediction": prediction, "prediction_b": prediction_b},
|
||||
callbacks=callbacks,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
return {"score": result["score"]}
|
||||
|
||||
async def _aevaluate_string_pairs(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
prediction_b: str,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Asynchronously evaluate the embedding distance
|
||||
|
||||
between two predictions.
|
||||
|
||||
Args:
|
||||
prediction (str): The output string from the first model.
|
||||
prediction_b (str): The output string from the second model.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
tags (List[str], optional): Tags to apply to traces
|
||||
metadata (Dict[str, Any], optional): metadata to apply to traces
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing:
|
||||
- score: The embedding distance between the two
|
||||
predictions.
|
||||
"""
|
||||
result = await self.acall(
|
||||
inputs={"prediction": prediction, "prediction_b": prediction_b},
|
||||
callbacks=callbacks,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
return {"score": result["score"]}
|
@ -0,0 +1,12 @@
|
||||
"""String distance evaluators."""
|
||||
from langchain.evaluation.string_distance.base import (
|
||||
PairwiseStringDistanceEvalChain,
|
||||
StringDistance,
|
||||
StringDistanceEvalChain,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PairwiseStringDistanceEvalChain",
|
||||
"StringDistance",
|
||||
"StringDistanceEvalChain",
|
||||
]
|
@ -0,0 +1,376 @@
|
||||
"""String distance evaluators based on the RapidFuzz library."""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from pydantic import Field, root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForChainRun,
|
||||
CallbackManagerForChainRun,
|
||||
Callbacks,
|
||||
)
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
||||
|
||||
|
||||
def _load_rapidfuzz() -> Any:
|
||||
"""
|
||||
Load the RapidFuzz library.
|
||||
|
||||
Raises:
|
||||
ImportError: If the rapidfuzz library is not installed.
|
||||
|
||||
Returns:
|
||||
Any: The rapidfuzz.distance module.
|
||||
"""
|
||||
try:
|
||||
import rapidfuzz
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install the rapidfuzz library to use the FuzzyMatchStringEvaluator."
|
||||
)
|
||||
return rapidfuzz.distance
|
||||
|
||||
|
||||
class StringDistance(str, Enum):
|
||||
"""Distance metric to use."""
|
||||
|
||||
DAMERAU_LEVENSHTEIN = "damerau_levenshtein"
|
||||
LEVENSHTEIN = "levenshtein"
|
||||
JARO = "jaro"
|
||||
JARO_WINKLER = "jaro_winkler"
|
||||
|
||||
|
||||
class _RapidFuzzChainMixin(Chain):
|
||||
"""Shared methods for the rapidfuzz string distance evaluators."""
|
||||
|
||||
distance: StringDistance = Field(default=StringDistance.LEVENSHTEIN)
|
||||
|
||||
@root_validator
|
||||
def validate_dependencies(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate that the rapidfuzz library is installed.
|
||||
|
||||
Args:
|
||||
values (Dict[str, Any]): The input values.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The validated values.
|
||||
"""
|
||||
_load_rapidfuzz()
|
||||
return values
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""
|
||||
Get the output keys.
|
||||
|
||||
Returns:
|
||||
List[str]: The output keys.
|
||||
"""
|
||||
return ["score"]
|
||||
|
||||
@staticmethod
|
||||
def _get_metric(distance: str) -> Callable:
|
||||
"""
|
||||
Get the distance metric function based on the distance type.
|
||||
|
||||
Args:
|
||||
distance (str): The distance type.
|
||||
|
||||
Returns:
|
||||
Callable: The distance metric function.
|
||||
|
||||
Raises:
|
||||
ValueError: If the distance metric is invalid.
|
||||
"""
|
||||
rf_distance = _load_rapidfuzz()
|
||||
if distance == StringDistance.DAMERAU_LEVENSHTEIN:
|
||||
return rf_distance.DamerauLevenshtein.distance
|
||||
elif distance == StringDistance.LEVENSHTEIN:
|
||||
return rf_distance.Levenshtein.distance
|
||||
elif distance == StringDistance.JARO:
|
||||
return rf_distance.Jaro.distance
|
||||
elif distance == StringDistance.JARO_WINKLER:
|
||||
return rf_distance.JaroWinkler.distance
|
||||
else:
|
||||
raise ValueError(f"Invalid distance metric: {distance}")
|
||||
|
||||
@property
|
||||
def metric(self) -> Callable:
|
||||
"""
|
||||
Get the distance metric function.
|
||||
|
||||
Returns:
|
||||
Callable: The distance metric function.
|
||||
"""
|
||||
return _RapidFuzzChainMixin._get_metric(self.distance)
|
||||
|
||||
|
||||
class StringDistanceEvalChain(_RapidFuzzChainMixin, StringEvaluator):
|
||||
"""Compute string distances between the prediction and the reference."""
|
||||
|
||||
@property
|
||||
def requires_input(self) -> bool:
|
||||
"""
|
||||
Check if input is required.
|
||||
|
||||
Returns:
|
||||
bool: True if input is required, False otherwise.
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_reference(self) -> bool:
|
||||
"""
|
||||
Check if reference is required.
|
||||
|
||||
Returns:
|
||||
bool: True if reference is required, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""
|
||||
Get the input keys.
|
||||
|
||||
Returns:
|
||||
List[str]: The input keys.
|
||||
"""
|
||||
return ["reference", "prediction"]
|
||||
|
||||
@staticmethod
|
||||
def _get_metric(distance: str) -> Callable:
|
||||
"""
|
||||
Get the distance metric function based on the distance type.
|
||||
|
||||
Args:
|
||||
distance (str): The distance type.
|
||||
|
||||
Returns:
|
||||
Callable: The distance metric function.
|
||||
|
||||
Raises:
|
||||
ValueError: If the distance metric is invalid.
|
||||
"""
|
||||
rf_distance = _load_rapidfuzz()
|
||||
if distance == StringDistance.DAMERAU_LEVENSHTEIN:
|
||||
return rf_distance.DamerauLevenshtein.distance
|
||||
elif distance == StringDistance.LEVENSHTEIN:
|
||||
return rf_distance.Levenshtein.distance
|
||||
elif distance == StringDistance.JARO:
|
||||
return rf_distance.Jaro.distance
|
||||
elif distance == StringDistance.JARO_WINKLER:
|
||||
return rf_distance.JaroWinkler.distance
|
||||
else:
|
||||
raise ValueError(f"Invalid distance metric: {distance}")
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Compute the string distance between the prediction and the reference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input values.
|
||||
run_manager (Optional[CallbackManagerForChainRun]):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The evaluation results containing the score.
|
||||
"""
|
||||
return {"score": self.metric(inputs["reference"], inputs["prediction"])}
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Asynchronously compute the string distance between the prediction
|
||||
and the reference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input values.
|
||||
run_manager (Optional[AsyncCallbackManagerForChainRun]:
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The evaluation results containing the score.
|
||||
"""
|
||||
return {"score": self.metric(inputs["reference"], inputs["prediction"])}
|
||||
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
input: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""
|
||||
Evaluate the string distance between the prediction and the reference.
|
||||
|
||||
Args:
|
||||
prediction (str): The prediction string.
|
||||
reference (Optional[str], optional): The reference string.
|
||||
input (Optional[str], optional): The input string.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation results containing the score.
|
||||
"""
|
||||
result = self(
|
||||
inputs={"prediction": prediction, "reference": reference},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {"score": result["score"]}
|
||||
|
||||
async def _aevaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
input: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""
|
||||
Asynchronously evaluate the string distance between the
|
||||
prediction and the reference.
|
||||
|
||||
Args:
|
||||
prediction (str): The prediction string.
|
||||
reference (Optional[str], optional): The reference string.
|
||||
input (Optional[str], optional): The input string.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation results containing the score.
|
||||
"""
|
||||
result = await self.acall(
|
||||
inputs={"prediction": prediction, "reference": reference},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {"score": result["score"]}
|
||||
|
||||
|
||||
class PairwiseStringDistanceEvalChain(_RapidFuzzChainMixin, PairwiseStringEvaluator):
|
||||
"""Compute string edit distances between two predictions."""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""
|
||||
Get the input keys.
|
||||
|
||||
Returns:
|
||||
List[str]: The input keys.
|
||||
"""
|
||||
return ["prediction", "prediction_b"]
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Compute the string distance between two predictions.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input values.
|
||||
run_manager (CallbackManagerForChainRun , optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The evaluation results containing the score.
|
||||
"""
|
||||
return {"score": self.metric(inputs["prediction"], inputs["prediction_b"])}
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Asynchronously compute the string distance between two predictions.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, Any]): The input values.
|
||||
run_manager (AsyncCallbackManagerForChainRun , optional):
|
||||
The callback manager.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The evaluation results containing the score.
|
||||
"""
|
||||
return {"score": self.metric(inputs["prediction"], inputs["prediction_b"])}
|
||||
|
||||
def _evaluate_string_pairs(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
prediction_b: str,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""
|
||||
Evaluate the string distance between two predictions.
|
||||
|
||||
Args:
|
||||
prediction (str): The first prediction string.
|
||||
prediction_b (str): The second prediction string.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
tags (List[str], optional): Tags to apply to traces.
|
||||
metadata (Dict[str, Any], optional): Metadata to apply to traces.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation results containing the score.
|
||||
"""
|
||||
result = self(
|
||||
inputs={"prediction": prediction, "prediction_b": prediction_b},
|
||||
callbacks=callbacks,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
return {"score": result["score"]}
|
||||
|
||||
async def _aevaluate_string_pairs(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
prediction_b: str,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""
|
||||
Asynchronously evaluate the string distance between two predictions.
|
||||
|
||||
Args:
|
||||
prediction (str): The first prediction string.
|
||||
prediction_b (str): The second prediction string.
|
||||
callbacks (Callbacks, optional): The callbacks to use.
|
||||
tags (List[str], optional): Tags to apply to traces.
|
||||
metadata (Dict[str, Any], optional): Metadata to apply to traces.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation results containing the score.
|
||||
"""
|
||||
result = await self.acall(
|
||||
inputs={"prediction": prediction, "prediction_b": prediction_b},
|
||||
callbacks=callbacks,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
return {"score": result["score"]}
|
@ -0,0 +1,123 @@
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from langchain.evaluation.embedding_distance import (
|
||||
EmbeddingDistance,
|
||||
PairwiseEmbeddingDistanceEvalChain,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vectors() -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Create two random vectors."""
|
||||
vector_a = np.array(
|
||||
[
|
||||
0.5488135,
|
||||
0.71518937,
|
||||
0.60276338,
|
||||
0.54488318,
|
||||
0.4236548,
|
||||
0.64589411,
|
||||
0.43758721,
|
||||
0.891773,
|
||||
0.96366276,
|
||||
0.38344152,
|
||||
]
|
||||
)
|
||||
vector_b = np.array(
|
||||
[
|
||||
0.79172504,
|
||||
0.52889492,
|
||||
0.56804456,
|
||||
0.92559664,
|
||||
0.07103606,
|
||||
0.0871293,
|
||||
0.0202184,
|
||||
0.83261985,
|
||||
0.77815675,
|
||||
0.87001215,
|
||||
]
|
||||
)
|
||||
return vector_a, vector_b
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def chain() -> PairwiseEmbeddingDistanceEvalChain:
|
||||
"""Create a PairwiseEmbeddingDistanceEvalChain."""
|
||||
return PairwiseEmbeddingDistanceEvalChain()
|
||||
|
||||
|
||||
@pytest.mark.requires("scipy")
|
||||
def test_cosine_similarity(
|
||||
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
|
||||
) -> None:
|
||||
"""Test the cosine similarity."""
|
||||
chain.distance_metric = EmbeddingDistance.COSINE
|
||||
result = chain._compute_score(np.array(vectors))
|
||||
expected = 1.0 - np.dot(vectors[0], vectors[1]) / (
|
||||
np.linalg.norm(vectors[0]) * np.linalg.norm(vectors[1])
|
||||
)
|
||||
assert np.isclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.requires("scipy")
|
||||
def test_euclidean_distance(
|
||||
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
|
||||
) -> None:
|
||||
"""Test the euclidean distance."""
|
||||
from scipy.spatial.distance import euclidean
|
||||
|
||||
chain.distance_metric = EmbeddingDistance.EUCLIDEAN
|
||||
result = chain._compute_score(np.array(vectors))
|
||||
expected = euclidean(*vectors)
|
||||
assert np.isclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.requires("scipy")
|
||||
def test_manhattan_distance(
|
||||
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
|
||||
) -> None:
|
||||
"""Test the manhattan distance."""
|
||||
from scipy.spatial.distance import cityblock
|
||||
|
||||
chain.distance_metric = EmbeddingDistance.MANHATTAN
|
||||
result = chain._compute_score(np.array(vectors))
|
||||
expected = cityblock(*vectors)
|
||||
assert np.isclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.requires("scipy")
|
||||
def test_chebyshev_distance(
|
||||
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
|
||||
) -> None:
|
||||
"""Test the chebyshev distance."""
|
||||
from scipy.spatial.distance import chebyshev
|
||||
|
||||
chain.distance_metric = EmbeddingDistance.CHEBYSHEV
|
||||
result = chain._compute_score(np.array(vectors))
|
||||
expected = chebyshev(*vectors)
|
||||
assert np.isclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.requires("scipy")
|
||||
def test_hamming_distance(
|
||||
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
|
||||
) -> None:
|
||||
"""Test the hamming distance."""
|
||||
from scipy.spatial.distance import hamming
|
||||
|
||||
chain.distance_metric = EmbeddingDistance.HAMMING
|
||||
result = chain._compute_score(np.array(vectors))
|
||||
expected = hamming(*vectors)
|
||||
assert np.isclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.requires("openai", "tiktoken")
|
||||
def test_embedding_distance(chain: PairwiseEmbeddingDistanceEvalChain) -> None:
|
||||
"""Test the embedding distance."""
|
||||
result = chain.evaluate_string_pairs(
|
||||
prediction="A single cat", prediction_b="A single cat"
|
||||
)
|
||||
assert np.isclose(result["score"], 0.0)
|
@ -0,0 +1,51 @@
|
||||
import pytest
|
||||
|
||||
from langchain.evaluation.string_distance import (
|
||||
PairwiseStringDistanceEvalChain,
|
||||
StringDistance,
|
||||
StringDistanceEvalChain,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.requires("rapidfuzz")
|
||||
@pytest.mark.parametrize("distance", list(StringDistance))
|
||||
def test_zero_distance(distance: StringDistance) -> None:
|
||||
eval_chain = StringDistanceEvalChain(distance=distance)
|
||||
string = "三人行则必有我师"
|
||||
result = eval_chain.evaluate_strings(prediction=string, reference=string)
|
||||
assert "score" in result
|
||||
assert result["score"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.requires("rapidfuzz")
|
||||
@pytest.mark.parametrize("distance", list(StringDistance))
|
||||
async def test_zero_distance_async(distance: StringDistance) -> None:
|
||||
eval_chain = StringDistanceEvalChain(distance=distance)
|
||||
string = "三人行则必有我师"
|
||||
result = await eval_chain.aevaluate_strings(prediction=string, reference=string)
|
||||
assert "score" in result
|
||||
assert result["score"] == 0
|
||||
|
||||
|
||||
@pytest.mark.requires("rapidfuzz")
|
||||
@pytest.mark.parametrize("distance", list(StringDistance))
|
||||
def test_zero_distance_pairwise(distance: StringDistance) -> None:
|
||||
eval_chain = PairwiseStringDistanceEvalChain(distance=distance)
|
||||
string = "三人行则必有我师"
|
||||
result = eval_chain.evaluate_string_pairs(prediction=string, prediction_b=string)
|
||||
assert "score" in result
|
||||
assert result["score"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.requires("rapidfuzz")
|
||||
@pytest.mark.parametrize("distance", list(StringDistance))
|
||||
async def test_zero_distance_pairwise_async(distance: StringDistance) -> None:
|
||||
eval_chain = PairwiseStringDistanceEvalChain(distance=distance)
|
||||
string = "三人行则必有我师"
|
||||
result = await eval_chain.aevaluate_string_pairs(
|
||||
prediction=string, prediction_b=string
|
||||
)
|
||||
assert "score" in result
|
||||
assert result["score"] == 0
|
Loading…
Reference in New Issue