diff --git a/docs/Makefile b/docs/Makefile index 72b02adab4..6c0c888dde 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,5 +1,5 @@ # we build the docs in these stages: -# 1. install quarto and python dependencies +# 1. install vercel and python dependencies # 2. copy files from "source dir" to "intermediate dir" # 2. generate files like model feat table, etc in "intermediate dir" # 3. copy files to their right spots (e.g. langserve readme) in "intermediate dir" @@ -7,12 +7,11 @@ SOURCE_DIR = docs/ INTERMEDIATE_DIR = build/intermediate/docs -OUTPUT_DIR = build/output -OUTPUT_DOCS_DIR = $(OUTPUT_DIR)/docs -PYTHON = .venv/bin/python +OUTPUT_NEW_DIR = build/output-new +OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs -QUARTO_CMD ?= quarto +PYTHON = .venv/bin/python PARTNER_DEPS_LIST := $(shell ls -1 ../libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "../libs/partners/{}" | tr '\n' ' ') @@ -25,9 +24,6 @@ install-vercel-deps: yum -y update yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y - wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz - tar -xzf quarto-1.3.450-linux-amd64.tar.gz - install-py-deps: python3 -m venv .venv $(PYTHON) -m pip install --upgrade pip @@ -55,26 +51,24 @@ generate-files: $(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(INTERMEDIATE_DIR) copy-infra: - mkdir -p $(OUTPUT_DIR) - cp -r src $(OUTPUT_DIR) - cp vercel.json $(OUTPUT_DIR) - cp babel.config.js $(OUTPUT_DIR) - cp -r data $(OUTPUT_DIR) - cp docusaurus.config.js $(OUTPUT_DIR) - cp package.json $(OUTPUT_DIR) - cp sidebars.js $(OUTPUT_DIR) - cp -r static $(OUTPUT_DIR) - cp yarn.lock $(OUTPUT_DIR) - -quarto-render: - $(QUARTO_CMD) render $(INTERMEDIATE_DIR) --output-dir $(OUTPUT_DOCS_DIR) --no-execute - mv $(OUTPUT_DOCS_DIR)/$(INTERMEDIATE_DIR)/* $(OUTPUT_DOCS_DIR) - rm -rf $(OUTPUT_DOCS_DIR)/build + mkdir -p $(OUTPUT_NEW_DIR) + cp -r src $(OUTPUT_NEW_DIR) + cp vercel.json $(OUTPUT_NEW_DIR) + cp babel.config.js $(OUTPUT_NEW_DIR) + cp -r data $(OUTPUT_NEW_DIR) + cp docusaurus.config.js $(OUTPUT_NEW_DIR) + cp package.json $(OUTPUT_NEW_DIR) + cp sidebars.js $(OUTPUT_NEW_DIR) + cp -r static $(OUTPUT_NEW_DIR) + cp yarn.lock $(OUTPUT_NEW_DIR) + +render: + $(PYTHON) scripts/notebook_convert.py $(INTERMEDIATE_DIR) $(OUTPUT_NEW_DOCS_DIR) md-sync: - rsync -avm --include="*/" --include="*.mdx" --include="*.md" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_DOCS_DIR) + rsync -avm --include="*/" --include="*.mdx" --include="*.md" --include="*.png" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_NEW_DOCS_DIR) -build: install-py-deps generate-files copy-infra quarto-render md-sync +build: install-py-deps generate-files copy-infra render md-sync start: - cd $(OUTPUT_DIR) && yarn && yarn start --port=$(PORT) + cd $(OUTPUT_NEW_DIR) && yarn && yarn start --port=$(PORT) diff --git a/docs/docs/expression_language/why.ipynb b/docs/docs/expression_language/why.ipynb index 018d6b0537..5a14e9c420 100644 --- a/docs/docs/expression_language/why.ipynb +++ b/docs/docs/expression_language/why.ipynb @@ -3,16 +3,18 @@ { "cell_type": "raw", "id": "bc346658-6820-413a-bd8f-11bd3082fe43", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "raw" + } + }, "source": [ "---\n", "sidebar_position: 0.5\n", "title: Advantages of LCEL\n", "---\n", "\n", - "```{=mdx}\n", - "import { ColumnContainer, Column } from \"@theme/Columns\";\n", - "```" + "import { ColumnContainer, Column } from \"@theme/Columns\";" ] }, { @@ -20,6 +22,7 @@ "id": "919a5ae2-ed21-4923-b98f-723c111bac67", "metadata": {}, "source": [ + "\n", ":::{.callout-tip} \n", "We recommend reading the LCEL [Get started](/docs/expression_language/get_started) section first.\n", ":::" @@ -56,13 +59,10 @@ "## Invoke\n", "In the simplest case, we just want to pass in a topic string and get back a joke string:\n", "\n", - "```{=mdx}\n", "\n", "\n", "\n", "\n", - "```\n", - "\n", "#### Without LCEL\n" ] }, @@ -102,11 +102,9 @@ "metadata": {}, "source": [ "\n", - "```{=mdx}\n", "\n", "\n", "\n", - "```\n", "\n", "#### LCEL\n", "\n" @@ -146,18 +144,15 @@ "metadata": {}, "source": [ "\n", - "```{=mdx}\n", "\n", "\n", - "```\n", + "\n", "## Stream\n", "If we want to stream results instead, we'll need to change our function:\n", "\n", - "```{=mdx}\n", "\n", "\n", "\n", - "```\n", "\n", "#### Without LCEL\n", "\n" @@ -198,11 +193,10 @@ "id": "f8e36b0e-c7dc-4130-a51b-189d4b756c7f", "metadata": {}, "source": [ - "```{=mdx}\n", "\n", "\n", "\n", - "```\n", + "\n", "#### LCEL\n", "\n" ] @@ -223,19 +217,18 @@ "id": "b9b41e78-ddeb-44d0-a58b-a0ea0c99a761", "metadata": {}, "source": [ - "```{=mdx}\n", "\n", "\n", - "```\n", + "\n", "\n", "## Batch\n", "\n", "If we want to run on a batch of inputs in parallel, we'll again need a new function:\n", "\n", - "```{=mdx}\n", + "\n", "\n", "\n", - "```\n", + "\n", "\n", "#### Without LCEL\n", "\n" @@ -263,11 +256,11 @@ "id": "9b3e9d34-6775-43c1-93d8-684b58e341ab", "metadata": {}, "source": [ - "```{=mdx}\n", + "\n", "\n", "\n", "\n", - "```\n", + "\n", "#### LCEL\n", "\n" ] @@ -287,18 +280,14 @@ "id": "cc5ba36f-eec1-4fc1-8cfe-fa242a7f7809", "metadata": {}, "source": [ - "```{=mdx}\n", "\n", "\n", - "```\n", "## Async\n", "\n", "If we need an asynchronous version:\n", "\n", - "```{=mdx}\n", "\n", "\n", - "```\n", "\n", "#### Without LCEL\n", "\n" @@ -334,11 +323,9 @@ "id": "2f209290-498c-4c17-839e-ee9002919846", "metadata": {}, "source": [ - "```{=mdx}\n", "\n", "\n", "\n", - "```\n", "\n", "#### LCEL\n", "\n" @@ -359,10 +346,9 @@ "id": "1f282129-99a3-40f4-b67f-2d0718b1bea9", "metadata": {}, "source": [ - "```{=mdx}\n", "\n", "\n", - "```\n", + "\n", "## Async Batch\n", "\n", "```{=mdx}\n", diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index ea72b7faac..2308c1063f 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -226,7 +226,7 @@ "\n", "With Anthropic's [tool-calling, or tool-use, API](https://docs.anthropic.com/claude/docs/functions-external-tools), you can define tools for the model to invoke. This is extremely useful for building tool-using chains and agents, as well as for getting structured outputs from a model.\n", "\n", - "::: {.callout-note}\n", + ":::note\n", "\n", "Anthropic's tool-calling functionality is still in beta.\n", "\n", @@ -583,20 +583,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "3e9d1ab5", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -612,7 +612,7 @@ "img_base64 = base64.b64encode(img_path.read_bytes()).decode(\"utf-8\")\n", "\n", "# display b64 image in notebook\n", - "HTML(f'')" + "HTML(f'')" ] }, { @@ -656,9 +656,9 @@ ], "metadata": { "kernelspec": { - "display_name": "poetry-venv-2", + "display_name": ".venv", "language": "python", - "name": "poetry-venv-2" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -670,7 +670,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/docs/integrations/chat/deepinfra.ipynb b/docs/docs/integrations/chat/deepinfra.ipynb index 4c556ffec0..f2810bdb7a 100644 --- a/docs/docs/integrations/chat/deepinfra.ipynb +++ b/docs/docs/integrations/chat/deepinfra.ipynb @@ -8,13 +8,8 @@ "source": [ "# DeepInfra\n", "\n", - "[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for chat models." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for chat models.\n", + "\n", "## Set the Environment API Key\n", "Make sure to get your API key from DeepInfra. You have to [Login](https://deepinfra.com/login?from=%2Fdash) and get a new token.\n", "\n", @@ -24,7 +19,8 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9", "metadata": { "tags": [] }, @@ -32,70 +28,19 @@ "source": [ "# get a new token: https://deepinfra.com/login?from=%2Fdash\n", "\n", + "import os\n", "from getpass import getpass\n", "\n", - "DEEPINFRA_API_TOKEN = getpass()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import os\n", + "from langchain_community.chat_models import ChatDeepInfra\n", + "from langchain_core.messages import HumanMessage\n", + "\n", + "DEEPINFRA_API_TOKEN = getpass()\n", "\n", "# or pass deepinfra_api_token parameter to the ChatDeepInfra constructor\n", - "os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain_community.chat_models import ChatDeepInfra\n", - "from langchain_core.messages import HumanMessage" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "chat = ChatDeepInfra(model=\"meta-llama/Llama-2-7b-chat-hf\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ + "os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN\n", + "\n", + "chat = ChatDeepInfra(model=\"meta-llama/Llama-2-7b-chat-hf\")\n", + "\n", "messages = [\n", " HumanMessage(\n", " content=\"Translate this sentence from English to French. I love programming.\"\n", @@ -115,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb", "metadata": { "tags": [] @@ -127,53 +72,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "await chat.agenerate([messages])" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "025be980-e50d-4a68-93dc-c9c7b500ce34", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " J'aime la programmation." - ] - }, - { - "data": { - "text/plain": [ - "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "chat = ChatDeepInfra(\n", " streaming=True,\n", diff --git a/docs/docs/integrations/chat/kinetica.ipynb b/docs/docs/integrations/chat/kinetica.ipynb index cacff2ee2e..4482efcb55 100644 --- a/docs/docs/integrations/chat/kinetica.ipynb +++ b/docs/docs/integrations/chat/kinetica.ipynb @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -172,7 +172,7 @@ " F\n", " 59836 Carla Causeway Suite 939\\nPort Eugene, I...\n", " meltondenise@yahoo.com\n", - " 1997-09-09\n", + " 1997-11-23\n", " \n", " \n", " 1\n", @@ -181,7 +181,7 @@ " M\n", " 3108 Christina Forges\\nPort Timothychester, KY...\n", " erica80@hotmail.com\n", - " 1924-05-05\n", + " 1924-07-19\n", " \n", " \n", " 2\n", @@ -190,7 +190,7 @@ " F\n", " Unit 7405 Box 3052\\nDPO AE 09858\n", " timothypotts@gmail.com\n", - " 1933-09-06\n", + " 1933-11-20\n", " \n", " \n", " 3\n", @@ -199,7 +199,7 @@ " F\n", " 6408 Christopher Hill Apt. 459\\nNew Benjamin, ...\n", " dadams@gmail.com\n", - " 1988-07-28\n", + " 1988-10-11\n", " \n", " \n", " 4\n", @@ -208,7 +208,7 @@ " M\n", " 2241 Bell Gardens Suite 723\\nScottside, CA 38463\n", " williamayala@gmail.com\n", - " 1930-12-19\n", + " 1931-03-04\n", " \n", " \n", "\n", @@ -233,14 +233,14 @@ "\n", " birthdate \n", "id \n", - "0 1997-09-09 \n", - "1 1924-05-05 \n", - "2 1933-09-06 \n", - "3 1988-07-28 \n", - "4 1930-12-19 " + "0 1997-11-23 \n", + "1 1924-07-19 \n", + "2 1933-11-20 \n", + "3 1988-10-11 \n", + "4 1931-03-04 " ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -646,7 +646,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.18" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/docs/integrations/llms/konko.ipynb b/docs/docs/integrations/llms/konko.ipynb index 8592a09d83..67bccb81fe 100644 --- a/docs/docs/integrations/llms/konko.ipynb +++ b/docs/docs/integrations/llms/konko.ipynb @@ -2,7 +2,12 @@ "cells": [ { "cell_type": "raw", - "metadata": {}, + "id": "b5f24c75", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, "source": [ "---\n", "sidebar_label: Konko\n", @@ -21,23 +26,12 @@ "1. **Select** the right open source or proprietary LLMs for their application\n", "2. **Build** applications faster with integrations to leading application frameworks and fully managed APIs\n", "3. **Fine tune** smaller open-source LLMs to achieve industry-leading performance at a fraction of the cost\n", - "4. **Deploy production-scale APIs** that meet security, privacy, throughput, and latency SLAs without infrastructure set-up or administration using Konko AI's SOC 2 compliant, multi-cloud infrastructure\n" - ] - }, - { - "cell_type": "markdown", - "id": "0d896d07-82b4-4f38-8c37-f0bc8b0e4fe1", - "metadata": {}, - "source": [ + "4. **Deploy production-scale APIs** that meet security, privacy, throughput, and latency SLAs without infrastructure set-up or administration using Konko AI's SOC 2 compliant, multi-cloud infrastructure\n", + "\n", "This example goes over how to use LangChain to interact with `Konko` completion [models](https://docs.konko.ai/docs/list-of-models#konko-hosted-models-for-completion)\n", "\n", - "To run this notebook, you'll need Konko API key. Sign in to our web app to [create an API key](https://platform.konko.ai/settings/api-keys) to access models" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "To run this notebook, you'll need Konko API key. Sign in to our web app to [create an API key](https://platform.konko.ai/settings/api-keys) to access models\n", + "\n", "#### Set Environment Variables\n", "\n", "1. You can set environment variables for \n", @@ -48,13 +42,8 @@ "```shell\n", "export KONKO_API_KEY={your_KONKO_API_KEY_here}\n", "export OPENAI_API_KEY={your_OPENAI_API_KEY_here} #Optional\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "```\n", + "\n", "## Calling a model\n", "\n", "Find a model on the [Konko overview page](https://docs.konko.ai/docs/list-of-models)\n", @@ -92,14 +81,6 @@ "input_ = \"\"\"You are a helpful assistant. Explain Big Bang Theory briefly.\"\"\"\n", "print(llm.invoke(input_))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "78148bf7-2211-40b4-93a7-e90139ab1169", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -118,7 +99,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/dspy.ipynb b/docs/docs/integrations/providers/dspy.ipynb index 85d616bba8..6929d9477e 100644 --- a/docs/docs/integrations/providers/dspy.ipynb +++ b/docs/docs/integrations/providers/dspy.ipynb @@ -13,7 +13,7 @@ "\n", "This short tutorial demonstrates how this proof-of-concept feature works. *This will not give you the full power of DSPy or LangChain yet, but we will expand it if there's high demand.*\n", "\n", - "Note: this was slightly modified from the original example Omar wrote for DSPy. If you are interested in LangChain <> DSPy but coming from the DSPy side, I'd recommend checking that out. You can find that [here](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/compiling_langchain.ipynb).\n", + "Note: this was slightly modified from the original example Omar wrote for DSPy. If you are interested in LangChain \\<\\> DSPy but coming from the DSPy side, I'd recommend checking that out. You can find that [here](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/compiling_langchain.ipynb).\n", "\n", "Let's take a look at an example. In this example we will make a simple RAG pipeline. We will use DSPy to \"compile\" our program and learn an optimized prompt.\n", "\n", @@ -218,7 +218,7 @@ "id": "13c293d6-0806-42f5-a4aa-5b50d4cf38d2", "metadata": {}, "source": [ - "## LCEL <> DSPy\n", + "## LCEL \\<\\> DSPy\n", "\n", "In order to use LangChain with DSPy, you need to make two minor modifications\n", "\n", diff --git a/docs/docs/use_cases/sql/agents.ipynb b/docs/docs/use_cases/sql/agents.ipynb index 9a91a34a8c..065237ea1e 100644 --- a/docs/docs/use_cases/sql/agents.ipynb +++ b/docs/docs/use_cases/sql/agents.ipynb @@ -352,7 +352,7 @@ "\n", "To optimize agent performance, we can provide a custom prompt with domain-specific knowledge. In this case we'll create a few shot prompt with an example selector, that will dynamically build the few shot prompt based on the user input. This will help the model make better queries by inserting relevant queries in the prompt that the model can use as reference.\n", "\n", - "First we need some user input <> SQL query examples:" + "First we need some user input \\<\\> SQL query examples:" ] }, { diff --git a/docs/scripts/generate_api_reference_links.py b/docs/scripts/generate_api_reference_links.py index 9838303faa..31f4b22fa4 100644 --- a/docs/scripts/generate_api_reference_links.py +++ b/docs/scripts/generate_api_reference_links.py @@ -185,8 +185,8 @@ def replace_imports(file): # Use re.sub to replace each Python code block data = code_block_re.sub(replacer, data) - if all_imports: - print(f"Adding {len(all_imports)} links for imports in {file}") # noqa: T201 + # if all_imports: + # print(f"Adding {len(all_imports)} links for imports in {file}") # noqa: T201 with open(file, "w") as f: f.write(data) return all_imports diff --git a/docs/scripts/notebook_convert.py b/docs/scripts/notebook_convert.py new file mode 100644 index 0000000000..03f98bd7a6 --- /dev/null +++ b/docs/scripts/notebook_convert.py @@ -0,0 +1,130 @@ +import multiprocessing +import os +import re +import sys +from pathlib import Path +from typing import Iterable, Tuple + +import nbformat +from nbconvert.exporters import MarkdownExporter +from nbconvert.preprocessors import Preprocessor, RegexRemovePreprocessor + + +class EscapePreprocessor(Preprocessor): + def preprocess_cell(self, cell, resources, cell_index): + if cell.cell_type == "markdown": + # find all occurrences of ```{=mdx} blocks and remove wrapper + if "```{=mdx}\n" in cell.source: + cell.source = re.sub( + r"```{=mdx}\n(.*?)\n```", r"\1", cell.source, flags=re.DOTALL + ) + if ":::{.callout" in cell.source: + cell.source = re.sub( + r":::{.callout-([^}]*)}(.*?):::", + r":::\1\2:::", + cell.source, + flags=re.DOTALL, + ) + return cell, resources + + +class ExtractAttachmentsPreprocessor(Preprocessor): + """ + Extracts all of the outputs from the notebook file. The extracted + outputs are returned in the 'resources' dictionary. + """ + + def preprocess_cell(self, cell, resources, cell_index): + """ + Apply a transformation on each cell, + Parameters + ---------- + cell : NotebookNode cell + Notebook cell being processed + resources : dictionary + Additional resources used in the conversion process. Allows + preprocessors to pass variables into the Jinja engine. + cell_index : int + Index of the cell being processed (see base.py) + """ + + # Get files directory if it has been specified + + # Make sure outputs key exists + if not isinstance(resources["outputs"], dict): + resources["outputs"] = {} + + # Loop through all of the attachments in the cell + for name, attach in cell.get("attachments", {}).items(): + for mime, data in attach.items(): + if mime not in { + "image/png", + "image/jpeg", + "image/svg+xml", + "application/pdf", + }: + continue + + # attachments are pre-rendered. Only replace markdown-formatted + # images with the following logic + attach_str = f"({name})" + if attach_str in cell.source: + data = f"(data:{mime};base64,{data})" + cell.source = cell.source.replace(attach_str, data) + + return cell, resources + + +exporter = MarkdownExporter( + preprocessors=[ + EscapePreprocessor, + ExtractAttachmentsPreprocessor, + RegexRemovePreprocessor(patterns=[r"^\s*$"]), + ], + template_name="mdoutput", + extra_template_basedirs=["./scripts/notebook_convert_templates"], +) + + +def _process_path(tup: Tuple[Path, Path, Path]): + notebook_path, intermediate_docs_dir, output_docs_dir = tup + relative = notebook_path.relative_to(intermediate_docs_dir) + output_path = output_docs_dir / relative.parent / (relative.stem + ".md") + _convert_notebook(notebook_path, output_path) + + +def _convert_notebook(notebook_path: Path, output_path: Path): + with open(notebook_path) as f: + nb = nbformat.read(f, as_version=4) + + body, resources = exporter.from_notebook_node(nb) + + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + f.write(body) + + return output_path + + +if __name__ == "__main__": + intermediate_docs_dir = Path(sys.argv[1]) + output_docs_dir = Path(sys.argv[2]) + + source_paths_arg = os.environ.get("SOURCE_PATHS") + source_paths: Iterable[Path] + if source_paths_arg: + source_path_strs = re.split(r"\s+", source_paths_arg) + source_paths_stripped = [p.strip() for p in source_path_strs] + source_paths = [intermediate_docs_dir / p for p in source_paths_stripped if p] + else: + source_paths = intermediate_docs_dir.glob("**/*.ipynb") + + with multiprocessing.Pool() as pool: + pool.map( + _process_path, + ( + (notebook_path, intermediate_docs_dir, output_docs_dir) + for notebook_path in source_paths + ), + ) diff --git a/docs/scripts/notebook_convert_templates/mdoutput/conf.json b/docs/scripts/notebook_convert_templates/mdoutput/conf.json new file mode 100644 index 0000000000..7adab7c92a --- /dev/null +++ b/docs/scripts/notebook_convert_templates/mdoutput/conf.json @@ -0,0 +1,5 @@ +{ + "mimetypes": { + "text/markdown": true + } +} \ No newline at end of file diff --git a/docs/scripts/notebook_convert_templates/mdoutput/index.md.j2 b/docs/scripts/notebook_convert_templates/mdoutput/index.md.j2 new file mode 100644 index 0000000000..00e1e03057 --- /dev/null +++ b/docs/scripts/notebook_convert_templates/mdoutput/index.md.j2 @@ -0,0 +1,33 @@ +{% extends 'markdown/index.md.j2' %} + +{%- block traceback_line -%} +```output +{{ line.rstrip() | strip_ansi }} +``` +{%- endblock traceback_line -%} + +{%- block stream -%} +```output +{{ output.text.rstrip() }} +``` +{%- endblock stream -%} + +{%- block data_text scoped -%} +```output +{{ output.data['text/plain'].rstrip() }} +``` +{%- endblock data_text -%} + +{%- block data_html scoped -%} +```html +{{ output.data['text/html'] | safe }} +``` +{%- endblock data_html -%} + +{%- block data_jpg scoped -%} +![](data:image/jpg;base64,{{ output.data['image/jpeg'] }}) +{%- endblock data_jpg -%} + +{%- block data_png scoped -%} +![](data:image/png;base64,{{ output.data['image/png'] }}) +{%- endblock data_png -%} diff --git a/docs/vercel_build.sh b/docs/vercel_build.sh index bfd9a3ed06..c848d83479 100755 --- a/docs/vercel_build.sh +++ b/docs/vercel_build.sh @@ -4,7 +4,7 @@ set -e make install-vercel-deps -QUARTO_CMD="./quarto-1.3.450/bin/quarto" make build +make build rm -rf docs -mv build/output/docs ./ \ No newline at end of file +mv build/output-new/docs ./ diff --git a/docs/vercel_requirements.txt b/docs/vercel_requirements.txt index 0c22e362e4..3aedb8c9e8 100644 --- a/docs/vercel_requirements.txt +++ b/docs/vercel_requirements.txt @@ -9,3 +9,4 @@ langchain-nvidia-ai-endpoints langchain-elasticsearch langchain-postgres urllib3==1.26.18 +nbconvert==7.16.4