langchain/docs/docs/how_to/example_selectors_mmr.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bc35afd0",
   "metadata": {},
   "source": [
    "# How to select examples by maximal marginal relevance (MMR)\n",
    "\n",
    "The `MaxMarginalRelevanceExampleSelector` selects examples based on a combination of which examples are most similar to the inputs, while also optimizing for diversity. It does this by finding the examples with the embeddings that have the greatest cosine similarity with the inputs, and then iteratively adding them while penalizing them for closeness to already selected examples.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ac95c968",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
    "from langchain.prompts.example_selector import (\n",
    "    MaxMarginalRelevanceExampleSelector,\n",
    "    SemanticSimilarityExampleSelector,\n",
    ")\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"Input: {input}\\nOutput: {output}\",\n",
    ")\n",
    "\n",
    "# Examples of a pretend task of creating antonyms.\n",
    "examples = [\n",
    "    {\"input\": \"happy\", \"output\": \"sad\"},\n",
    "    {\"input\": \"tall\", \"output\": \"short\"},\n",
    "    {\"input\": \"energetic\", \"output\": \"lethargic\"},\n",
    "    {\"input\": \"sunny\", \"output\": \"gloomy\"},\n",
    "    {\"input\": \"windy\", \"output\": \"calm\"},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "db579bea",
   "metadata": {},
   "outputs": [],
   "source": [
    "example_selector = MaxMarginalRelevanceExampleSelector.from_examples(\n",
    "    # The list of examples available to select from.\n",
    "    examples,\n",
    "    # The embedding class used to produce embeddings which are used to measure semantic similarity.\n",
    "    OpenAIEmbeddings(),\n",
    "    # The VectorStore class that is used to store the embeddings and do a similarity search over.\n",
    "    FAISS,\n",
    "    # The number of examples to produce.\n",
    "    k=2,\n",
    ")\n",
    "mmr_prompt = FewShotPromptTemplate(\n",
    "    # We provide an ExampleSelector instead of examples.\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"Give the antonym of every input\",\n",
    "    suffix=\"Input: {adjective}\\nOutput:\",\n",
    "    input_variables=[\"adjective\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cd76e344",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Give the antonym of every input\n",
      "\n",
      "Input: happy\n",
      "Output: sad\n",
      "\n",
      "Input: windy\n",
      "Output: calm\n",
      "\n",
      "Input: worried\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# Input is a feeling, so should select the happy/sad example as the first one\n",
    "print(mmr_prompt.format(adjective=\"worried\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cf82956b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Give the antonym of every input\n",
      "\n",
      "Input: happy\n",
      "Output: sad\n",
      "\n",
      "Input: sunny\n",
      "Output: gloomy\n",
      "\n",
      "Input: worried\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "# Let's compare this to what we would just get if we went solely off of similarity,\n",
    "# by using SemanticSimilarityExampleSelector instead of MaxMarginalRelevanceExampleSelector.\n",
    "example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
    "    # The list of examples available to select from.\n",
    "    examples,\n",
    "    # The embedding class used to produce embeddings which are used to measure semantic similarity.\n",
    "    OpenAIEmbeddings(),\n",
    "    # The VectorStore class that is used to store the embeddings and do a similarity search over.\n",
    "    FAISS,\n",
    "    # The number of examples to produce.\n",
    "    k=2,\n",
    ")\n",
    "similar_prompt = FewShotPromptTemplate(\n",
    "    # We provide an ExampleSelector instead of examples.\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"Give the antonym of every input\",\n",
    "    suffix=\"Input: {adjective}\\nOutput:\",\n",
    "    input_variables=[\"adjective\"],\n",
    ")\n",
    "print(similar_prompt.format(adjective=\"worried\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39f30097",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}