{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# RAG using Upstage Layout Analysis and Groundedness Check\n", "This example illustrates RAG using [Upstage](https://python.langchain.com/docs/integrations/providers/upstage/) Layout Analysis and Groundedness Check." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from typing import List\n", "\n", "from langchain_community.vectorstores import DocArrayInMemorySearch\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.prompts import ChatPromptTemplate\n", "from langchain_core.runnables import RunnablePassthrough\n", "from langchain_core.runnables.base import RunnableSerializable\n", "from langchain_upstage import (\n", " ChatUpstage,\n", " UpstageEmbeddings,\n", " UpstageGroundednessCheck,\n", " UpstageLayoutAnalysisLoader,\n", ")\n", "\n", "model = ChatUpstage()\n", "\n", "files = [\"/PATH/TO/YOUR/FILE.pdf\", \"/PATH/TO/YOUR/FILE2.pdf\"]\n", "\n", "loader = UpstageLayoutAnalysisLoader(file_path=files, split=\"element\")\n", "\n", "docs = loader.load()\n", "\n", "vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n", "retriever = vectorstore.as_retriever()\n", "\n", "template = \"\"\"Answer the question based only on the following context:\n", "{context}\n", "\n", "Question: {question}\n", "\"\"\"\n", "prompt = ChatPromptTemplate.from_template(template)\n", "output_parser = StrOutputParser()\n", "\n", "retrieved_docs = retriever.get_relevant_documents(\"How many parameters in SOLAR model?\")\n", "\n", "groundedness_check = UpstageGroundednessCheck()\n", "groundedness = \"\"\n", "while groundedness != \"grounded\":\n", " chain: RunnableSerializable = RunnablePassthrough() | prompt | model | output_parser\n", "\n", " result = chain.invoke(\n", " {\n", " \"context\": retrieved_docs,\n", " \"question\": \"How many parameters in SOLAR model?\",\n", " }\n", " )\n", "\n", " groundedness = groundedness_check.invoke(\n", " {\n", " \"context\": retrieved_docs,\n", " \"answer\": result,\n", " }\n", " )" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }