infra: use nbconvert for docs build (#21135)

todo - [x] remove quarto build semantics - [x] remove quarto download/install - [x] make `uv` not verbose
2 weeks ago · d5bde4fa91
parent ad0f3c14c2
commit d5bde4fa91
14 changed files with 263 additions and 217 deletions
--- a/docs/Makefile
+++ b/docs/Makefile
@ -1,5 +1,5 @@
 # we build the docs in these stages:
-# 1. install quarto and python dependencies
+# 1. install vercel and python dependencies
 # 2. copy files from "source dir" to "intermediate dir"
 # 2. generate files like model feat table, etc in "intermediate dir"
 # 3. copy files to their right spots (e.g. langserve readme) in "intermediate dir"
@ -7,12 +7,11 @@

 SOURCE_DIR = docs/
 INTERMEDIATE_DIR = build/intermediate/docs
-OUTPUT_DIR = build/output
-OUTPUT_DOCS_DIR = $(OUTPUT_DIR)/docs

-PYTHON = .venv/bin/python
+OUTPUT_NEW_DIR = build/output-new
+OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs

-QUARTO_CMD ?= quarto
+PYTHON = .venv/bin/python

 PARTNER_DEPS_LIST := $(shell ls -1 ../libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "../libs/partners/{}" | tr '\n' ' ')

@ -25,9 +24,6 @@ install-vercel-deps:
 	yum -y update
 	yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y

-	wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz
-	tar -xzf quarto-1.3.450-linux-amd64.tar.gz
-
 install-py-deps:
 	python3 -m venv .venv
 	$(PYTHON) -m pip install --upgrade pip
@ -55,26 +51,24 @@ generate-files:
 	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(INTERMEDIATE_DIR)

 copy-infra:
-	mkdir -p $(OUTPUT_DIR)
-	cp -r src $(OUTPUT_DIR)
-	cp vercel.json $(OUTPUT_DIR)
-	cp babel.config.js $(OUTPUT_DIR)
-	cp -r data $(OUTPUT_DIR)
-	cp docusaurus.config.js $(OUTPUT_DIR)
-	cp package.json $(OUTPUT_DIR)
-	cp sidebars.js $(OUTPUT_DIR)
-	cp -r static $(OUTPUT_DIR)
-	cp yarn.lock $(OUTPUT_DIR)
-
-quarto-render:
-	$(QUARTO_CMD) render $(INTERMEDIATE_DIR) --output-dir $(OUTPUT_DOCS_DIR) --no-execute
-	mv $(OUTPUT_DOCS_DIR)/$(INTERMEDIATE_DIR)/* $(OUTPUT_DOCS_DIR)
-	rm -rf $(OUTPUT_DOCS_DIR)/build
+	mkdir -p $(OUTPUT_NEW_DIR)
+	cp -r src $(OUTPUT_NEW_DIR)
+	cp vercel.json $(OUTPUT_NEW_DIR)
+	cp babel.config.js $(OUTPUT_NEW_DIR)
+	cp -r data $(OUTPUT_NEW_DIR)
+	cp docusaurus.config.js $(OUTPUT_NEW_DIR)
+	cp package.json $(OUTPUT_NEW_DIR)
+	cp sidebars.js $(OUTPUT_NEW_DIR)
+	cp -r static $(OUTPUT_NEW_DIR)
+	cp yarn.lock $(OUTPUT_NEW_DIR)
+
+render:
+	$(PYTHON) scripts/notebook_convert.py $(INTERMEDIATE_DIR) $(OUTPUT_NEW_DOCS_DIR)

 md-sync:
-	rsync -avm --include="*/" --include="*.mdx" --include="*.md" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_DOCS_DIR)
+	rsync -avm --include="*/" --include="*.mdx" --include="*.md" --include="*.png" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_NEW_DOCS_DIR)

-build: install-py-deps generate-files copy-infra quarto-render md-sync
+build: install-py-deps generate-files copy-infra render md-sync

 start:
-	cd $(OUTPUT_DIR) && yarn && yarn start --port=$(PORT)
+	cd $(OUTPUT_NEW_DIR) && yarn && yarn start --port=$(PORT)
--- a/docs/docs/expression_language/why.ipynb
+++ b/docs/docs/expression_language/why.ipynb
@ -3,16 +3,18 @@
  {
   "cell_type": "raw",
   "id": "bc346658-6820-413a-bd8f-11bd3082fe43",
-   "metadata": {},
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
   "source": [
    "---\n",
    "sidebar_position: 0.5\n",
    "title: Advantages of LCEL\n",
    "---\n",
    "\n",
-    "```{=mdx}\n",
-    "import { ColumnContainer, Column } from \"@theme/Columns\";\n",
-    "```"
+    "import { ColumnContainer, Column } from \"@theme/Columns\";"
   ]
  },
  {
@ -20,6 +22,7 @@
   "id": "919a5ae2-ed21-4923-b98f-723c111bac67",
   "metadata": {},
   "source": [
+    "\n",
    ":::{.callout-tip} \n",
    "We recommend reading the LCEL [Get started](/docs/expression_language/get_started) section first.\n",
    ":::"
@ -56,13 +59,10 @@
    "## Invoke\n",
    "In the simplest case, we just want to pass in a topic string and get back a joke string:\n",
    "\n",
-    "```{=mdx}\n",
    "<ColumnContainer>\n",
    "\n",
    "<Column>\n",
    "\n",
-    "```\n",
-    "\n",
    "#### Without LCEL\n"
   ]
  },
@ -102,11 +102,9 @@
   "metadata": {},
   "source": [
    "\n",
-    "```{=mdx}\n",
    "</Column>\n",
    "\n",
    "<Column>\n",
-    "```\n",
    "\n",
    "#### LCEL\n",
    "\n"
@ -146,18 +144,15 @@
   "metadata": {},
   "source": [
    "\n",
-    "```{=mdx}\n",
    "</Column>\n",
    "</ColumnContainer>\n",
-    "```\n",
+    "\n",
    "## Stream\n",
    "If we want to stream results instead, we'll need to change our function:\n",
    "\n",
-    "```{=mdx}\n",
    "\n",
    "<ColumnContainer>\n",
    "<Column>\n",
-    "```\n",
    "\n",
    "#### Without LCEL\n",
    "\n"
@ -198,11 +193,10 @@
   "id": "f8e36b0e-c7dc-4130-a51b-189d4b756c7f",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
    "</Column>\n",
    "\n",
    "<Column>\n",
-    "```\n",
+    "\n",
    "#### LCEL\n",
    "\n"
   ]
@ -223,19 +217,18 @@
   "id": "b9b41e78-ddeb-44d0-a58b-a0ea0c99a761",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
    "</Column>\n",
    "</ColumnContainer>\n",
-    "```\n",
+    "\n",
    "\n",
    "## Batch\n",
    "\n",
    "If we want to run on a batch of inputs in parallel, we'll again need a new function:\n",
    "\n",
-    "```{=mdx}\n",
+    "\n",
    "<ColumnContainer>\n",
    "<Column>\n",
-    "```\n",
+    "\n",
    "\n",
    "#### Without LCEL\n",
    "\n"
@ -263,11 +256,11 @@
   "id": "9b3e9d34-6775-43c1-93d8-684b58e341ab",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
+    "\n",
    "</Column>\n",
    "\n",
    "<Column>\n",
-    "```\n",
+    "\n",
    "#### LCEL\n",
    "\n"
   ]
@ -287,18 +280,14 @@
   "id": "cc5ba36f-eec1-4fc1-8cfe-fa242a7f7809",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
    "</Column>\n",
    "</ColumnContainer>\n",
-    "```\n",
    "## Async\n",
    "\n",
    "If we need an asynchronous version:\n",
    "\n",
-    "```{=mdx}\n",
    "<ColumnContainer>\n",
    "<Column>\n",
-    "```\n",
    "\n",
    "#### Without LCEL\n",
    "\n"
@ -334,11 +323,9 @@
   "id": "2f209290-498c-4c17-839e-ee9002919846",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
    "</Column>\n",
    "\n",
    "<Column>\n",
-    "```\n",
    "\n",
    "#### LCEL\n",
    "\n"
@ -359,10 +346,9 @@
   "id": "1f282129-99a3-40f4-b67f-2d0718b1bea9",
   "metadata": {},
   "source": [
-    "```{=mdx}\n",
    "</Column>\n",
    "</ColumnContainer>\n",
-    "```\n",
+    "\n",
    "## Async Batch\n",
    "\n",
    "```{=mdx}\n",
--- a/docs/docs/integrations/chat/anthropic.ipynb
+++ b/docs/docs/integrations/chat/anthropic.ipynb
--- a/docs/docs/integrations/chat/deepinfra.ipynb
+++ b/docs/docs/integrations/chat/deepinfra.ipynb
@ -8,13 +8,8 @@
   "source": [
    "# DeepInfra\n",
    "\n",
-    "[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for chat models."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for chat models.\n",
+    "\n",
    "## Set the Environment API Key\n",
    "Make sure to get your API key from DeepInfra. You have to [Login](https://deepinfra.com/login?from=%2Fdash) and get a new token.\n",
    "\n",
@ -24,7 +19,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
+   "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
   "metadata": {
    "tags": []
   },
@ -32,70 +28,19 @@
   "source": [
    "# get a new token: https://deepinfra.com/login?from=%2Fdash\n",
    "\n",
+    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "DEEPINFRA_API_TOKEN = getpass()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
+    "from langchain_community.chat_models import ChatDeepInfra\n",
+    "from langchain_core.messages import HumanMessage\n",
+    "\n",
+    "DEEPINFRA_API_TOKEN = getpass()\n",
    "\n",
    "# or pass deepinfra_api_token parameter to the ChatDeepInfra constructor\n",
-    "os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.chat_models import ChatDeepInfra\n",
-    "from langchain_core.messages import HumanMessage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "chat = ChatDeepInfra(model=\"meta-llama/Llama-2-7b-chat-hf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
+    "os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN\n",
+    "\n",
+    "chat = ChatDeepInfra(model=\"meta-llama/Llama-2-7b-chat-hf\")\n",
+    "\n",
    "messages = [\n",
    "    HumanMessage(\n",
    "        content=\"Translate this sentence from English to French. I love programming.\"\n",
@ -115,7 +60,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb",
   "metadata": {
    "tags": []
@ -127,53 +72,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
   "metadata": {
    "tags": []
   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "await chat.agenerate([messages])"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
   "metadata": {
    "tags": []
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " J'aime la programmation."
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "chat = ChatDeepInfra(\n",
    "    streaming=True,\n",
--- a/docs/docs/integrations/chat/kinetica.ipynb
+++ b/docs/docs/integrations/chat/kinetica.ipynb
@ -123,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
@ -172,7 +172,7 @@
       "      <td>F</td>\n",
       "      <td>59836 Carla Causeway Suite 939\\nPort Eugene, I...</td>\n",
       "      <td>meltondenise@yahoo.com</td>\n",
-       "      <td>1997-09-09</td>\n",
+       "      <td>1997-11-23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
@ -181,7 +181,7 @@
       "      <td>M</td>\n",
       "      <td>3108 Christina Forges\\nPort Timothychester, KY...</td>\n",
       "      <td>erica80@hotmail.com</td>\n",
-       "      <td>1924-05-05</td>\n",
+       "      <td>1924-07-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
@ -190,7 +190,7 @@
       "      <td>F</td>\n",
       "      <td>Unit 7405 Box 3052\\nDPO AE 09858</td>\n",
       "      <td>timothypotts@gmail.com</td>\n",
-       "      <td>1933-09-06</td>\n",
+       "      <td>1933-11-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
@ -199,7 +199,7 @@
       "      <td>F</td>\n",
       "      <td>6408 Christopher Hill Apt. 459\\nNew Benjamin, ...</td>\n",
       "      <td>dadams@gmail.com</td>\n",
-       "      <td>1988-07-28</td>\n",
+       "      <td>1988-10-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
@ -208,7 +208,7 @@
       "      <td>M</td>\n",
       "      <td>2241 Bell Gardens Suite 723\\nScottside, CA 38463</td>\n",
       "      <td>williamayala@gmail.com</td>\n",
-       "      <td>1930-12-19</td>\n",
+       "      <td>1931-03-04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
@ -233,14 +233,14 @@
       "\n",
       "    birthdate  \n",
       "id             \n",
-       "0  1997-09-09  \n",
-       "1  1924-05-05  \n",
-       "2  1933-09-06  \n",
-       "3  1988-07-28  \n",
-       "4  1930-12-19  "
+       "0  1997-11-23  \n",
+       "1  1924-07-19  \n",
+       "2  1933-11-20  \n",
+       "3  1988-10-11  \n",
+       "4  1931-03-04  "
      ]
     },
-     "execution_count": 2,
+     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -646,7 +646,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.18"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/konko.ipynb
+++ b/docs/docs/integrations/llms/konko.ipynb
@ -2,7 +2,12 @@
 "cells": [
  {
   "cell_type": "raw",
-   "metadata": {},
+   "id": "b5f24c75",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
   "source": [
    "---\n",
    "sidebar_label: Konko\n",
@ -21,23 +26,12 @@
    "1. **Select** the right open source or proprietary LLMs for their application\n",
    "2. **Build** applications faster with integrations to leading application frameworks and fully managed APIs\n",
    "3. **Fine tune** smaller open-source LLMs to achieve industry-leading performance at a fraction of the cost\n",
-    "4. **Deploy production-scale APIs** that meet security, privacy, throughput, and latency SLAs without infrastructure set-up or administration using Konko AI's SOC 2 compliant, multi-cloud infrastructure\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0d896d07-82b4-4f38-8c37-f0bc8b0e4fe1",
-   "metadata": {},
-   "source": [
+    "4. **Deploy production-scale APIs** that meet security, privacy, throughput, and latency SLAs without infrastructure set-up or administration using Konko AI's SOC 2 compliant, multi-cloud infrastructure\n",
+    "\n",
    "This example goes over how to use LangChain to interact with `Konko` completion [models](https://docs.konko.ai/docs/list-of-models#konko-hosted-models-for-completion)\n",
    "\n",
-    "To run this notebook, you'll need Konko API key. Sign in to our web app to [create an API key](https://platform.konko.ai/settings/api-keys) to access models"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "To run this notebook, you'll need Konko API key. Sign in to our web app to [create an API key](https://platform.konko.ai/settings/api-keys) to access models\n",
+    "\n",
    "#### Set Environment Variables\n",
    "\n",
    "1. You can set environment variables for \n",
@ -48,13 +42,8 @@
    "```shell\n",
    "export KONKO_API_KEY={your_KONKO_API_KEY_here}\n",
    "export OPENAI_API_KEY={your_OPENAI_API_KEY_here} #Optional\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "```\n",
+    "\n",
    "## Calling a model\n",
    "\n",
    "Find a model on the [Konko overview page](https://docs.konko.ai/docs/list-of-models)\n",
@ -92,14 +81,6 @@
    "input_ = \"\"\"You are a helpful assistant. Explain Big Bang Theory briefly.\"\"\"\n",
    "print(llm.invoke(input_))"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "78148bf7-2211-40b4-93a7-e90139ab1169",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
@ -118,7 +99,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/providers/dspy.ipynb
+++ b/docs/docs/integrations/providers/dspy.ipynb
@ -13,7 +13,7 @@
    "\n",
    "This short tutorial demonstrates how this proof-of-concept feature works. *This will not give you the full power of DSPy or LangChain yet, but we will expand it if there's high demand.*\n",
    "\n",
-    "Note: this was slightly modified from the original example Omar wrote for DSPy. If you are interested in LangChain <> DSPy but coming from the DSPy side, I'd recommend checking that out. You can find that [here](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/compiling_langchain.ipynb).\n",
+    "Note: this was slightly modified from the original example Omar wrote for DSPy. If you are interested in LangChain \\<\\> DSPy but coming from the DSPy side, I'd recommend checking that out. You can find that [here](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/compiling_langchain.ipynb).\n",
    "\n",
    "Let's take a look at an example. In this example we will make a simple RAG pipeline. We will use DSPy to \"compile\" our program and learn an optimized prompt.\n",
    "\n",
@ -218,7 +218,7 @@
   "id": "13c293d6-0806-42f5-a4aa-5b50d4cf38d2",
   "metadata": {},
   "source": [
-    "## LCEL <> DSPy\n",
+    "## LCEL \\<\\> DSPy\n",
    "\n",
    "In order to use LangChain with DSPy, you need to make two minor modifications\n",
    "\n",
--- a/docs/docs/use_cases/sql/agents.ipynb
+++ b/docs/docs/use_cases/sql/agents.ipynb
@ -352,7 +352,7 @@
    "\n",
    "To optimize agent performance, we can provide a custom prompt with domain-specific knowledge. In this case we'll create a few shot prompt with an example selector, that will dynamically build the few shot prompt based on the user input. This will help the model make better queries by inserting relevant queries in the prompt that the model can use as reference.\n",
    "\n",
-    "First we need some user input <> SQL query examples:"
+    "First we need some user input \\<\\> SQL query examples:"
   ]
  },
  {
--- a/docs/scripts/generate_api_reference_links.py
+++ b/docs/scripts/generate_api_reference_links.py
@ -185,8 +185,8 @@ def replace_imports(file):
    # Use re.sub to replace each Python code block
    data = code_block_re.sub(replacer, data)

-    if all_imports:
-        print(f"Adding {len(all_imports)} links for imports in {file}")  # noqa: T201
+    # if all_imports:
+    #     print(f"Adding {len(all_imports)} links for imports in {file}")  # noqa: T201
    with open(file, "w") as f:
        f.write(data)
    return all_imports
--- a/docs/scripts/notebook_convert.py
+++ b/docs/scripts/notebook_convert.py
@ -0,0 +1,130 @@
+import multiprocessing
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Iterable, Tuple
+
+import nbformat
+from nbconvert.exporters import MarkdownExporter
+from nbconvert.preprocessors import Preprocessor, RegexRemovePreprocessor
+
+
+class EscapePreprocessor(Preprocessor):
+    def preprocess_cell(self, cell, resources, cell_index):
+        if cell.cell_type == "markdown":
+            # find all occurrences of ```{=mdx} blocks and remove wrapper
+            if "```{=mdx}\n" in cell.source:
+                cell.source = re.sub(
+                    r"```{=mdx}\n(.*?)\n```", r"\1", cell.source, flags=re.DOTALL
+                )
+            if ":::{.callout" in cell.source:
+                cell.source = re.sub(
+                    r":::{.callout-([^}]*)}(.*?):::",
+                    r":::\1\2:::",
+                    cell.source,
+                    flags=re.DOTALL,
+                )
+        return cell, resources
+
+
+class ExtractAttachmentsPreprocessor(Preprocessor):
+    """
+    Extracts all of the outputs from the notebook file.  The extracted
+    outputs are returned in the 'resources' dictionary.
+    """
+
+    def preprocess_cell(self, cell, resources, cell_index):
+        """
+        Apply a transformation on each cell,
+        Parameters
+        ----------
+        cell : NotebookNode cell
+            Notebook cell being processed
+        resources : dictionary
+            Additional resources used in the conversion process.  Allows
+            preprocessors to pass variables into the Jinja engine.
+        cell_index : int
+            Index of the cell being processed (see base.py)
+        """
+
+        # Get files directory if it has been specified
+
+        # Make sure outputs key exists
+        if not isinstance(resources["outputs"], dict):
+            resources["outputs"] = {}
+
+        # Loop through all of the attachments in the cell
+        for name, attach in cell.get("attachments", {}).items():
+            for mime, data in attach.items():
+                if mime not in {
+                    "image/png",
+                    "image/jpeg",
+                    "image/svg+xml",
+                    "application/pdf",
+                }:
+                    continue
+
+                # attachments are pre-rendered. Only replace markdown-formatted
+                # images with the following logic
+                attach_str = f"({name})"
+                if attach_str in cell.source:
+                    data = f"(data:{mime};base64,{data})"
+                    cell.source = cell.source.replace(attach_str, data)
+
+        return cell, resources
+
+
+exporter = MarkdownExporter(
+    preprocessors=[
+        EscapePreprocessor,
+        ExtractAttachmentsPreprocessor,
+        RegexRemovePreprocessor(patterns=[r"^\s*$"]),
+    ],
+    template_name="mdoutput",
+    extra_template_basedirs=["./scripts/notebook_convert_templates"],
+)
+
+
+def _process_path(tup: Tuple[Path, Path, Path]):
+    notebook_path, intermediate_docs_dir, output_docs_dir = tup
+    relative = notebook_path.relative_to(intermediate_docs_dir)
+    output_path = output_docs_dir / relative.parent / (relative.stem + ".md")
+    _convert_notebook(notebook_path, output_path)
+
+
+def _convert_notebook(notebook_path: Path, output_path: Path):
+    with open(notebook_path) as f:
+        nb = nbformat.read(f, as_version=4)
+
+    body, resources = exporter.from_notebook_node(nb)
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        f.write(body)
+
+    return output_path
+
+
+if __name__ == "__main__":
+    intermediate_docs_dir = Path(sys.argv[1])
+    output_docs_dir = Path(sys.argv[2])
+
+    source_paths_arg = os.environ.get("SOURCE_PATHS")
+    source_paths: Iterable[Path]
+    if source_paths_arg:
+        source_path_strs = re.split(r"\s+", source_paths_arg)
+        source_paths_stripped = [p.strip() for p in source_path_strs]
+        source_paths = [intermediate_docs_dir / p for p in source_paths_stripped if p]
+    else:
+        source_paths = intermediate_docs_dir.glob("**/*.ipynb")
+
+    with multiprocessing.Pool() as pool:
+        pool.map(
+            _process_path,
+            (
+                (notebook_path, intermediate_docs_dir, output_docs_dir)
+                for notebook_path in source_paths
+            ),
+        )
--- a/docs/scripts/notebook_convert_templates/mdoutput/conf.json
+++ b/docs/scripts/notebook_convert_templates/mdoutput/conf.json
@ -0,0 +1,5 @@
+{
+  "mimetypes": {
+    "text/markdown": true
+  }
+}
--- a/docs/scripts/notebook_convert_templates/mdoutput/index.md.j2
+++ b/docs/scripts/notebook_convert_templates/mdoutput/index.md.j2
@ -0,0 +1,33 @@
+{% extends 'markdown/index.md.j2' %}
+
+{%- block traceback_line -%}
+```output
+{{ line.rstrip() | strip_ansi }}
+```
+{%- endblock traceback_line -%}
+
+{%- block stream -%}
+```output
+{{ output.text.rstrip() }}
+```
+{%- endblock stream -%}
+
+{%- block data_text scoped -%}
+```output
+{{ output.data['text/plain'].rstrip() }}
+```
+{%- endblock data_text -%}
+
+{%- block data_html scoped -%}
+```html
+{{ output.data['text/html'] | safe }} 
+```
+{%- endblock data_html -%}
+
+{%- block data_jpg scoped -%}
+![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
+{%- endblock data_jpg -%}
+
+{%- block data_png scoped -%}
+![](data:image/png;base64,{{ output.data['image/png'] }})
+{%- endblock data_png -%}
--- a/docs/vercel_build.sh
+++ b/docs/vercel_build.sh
@ -4,7 +4,7 @@ set -e

 make install-vercel-deps

-QUARTO_CMD="./quarto-1.3.450/bin/quarto" make build
+make build

 rm -rf docs
-mv build/output/docs ./
+mv build/output-new/docs ./
--- a/docs/vercel_requirements.txt
+++ b/docs/vercel_requirements.txt
@ -9,3 +9,4 @@ langchain-nvidia-ai-endpoints
 langchain-elasticsearch
 langchain-postgres
 urllib3==1.26.18
+nbconvert==7.16.4