|
|
|
@ -558,7 +558,7 @@
|
|
|
|
|
"id": "50b294c2-5048-4561-b610-2253c24fce62",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## 8. Summary\n",
|
|
|
|
|
"## 8. Summary evaluators\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"We can use summary evaluators to compute summary metrics over a dataset.\n",
|
|
|
|
|
"\n",
|
|
|
|
@ -604,7 +604,13 @@
|
|
|
|
|
" ]\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"retrieval_grader_oai = grade_prompt | structured_llm_grader"
|
|
|
|
|
"retrieval_grader_oai = grade_prompt | structured_llm_grader\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def predict_oai(inputs: dict) -> dict:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Returns pydantic object\n",
|
|
|
|
|
" grade = retrieval_grader_oai.invoke({\"question\": inputs[\"question\"], \"document\": inputs[\"doc_txt\"]})\n",
|
|
|
|
|
" return {\"grade\":grade.score}"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -634,7 +640,13 @@
|
|
|
|
|
" input_variables=[\"question\", \"document\"],\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"retrieval_grader_mistral = prompt | llm | JsonOutputParser()"
|
|
|
|
|
"retrieval_grader_mistral = prompt | llm | JsonOutputParser()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def predict_mistral(inputs: dict) -> dict:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Returns JSON\n",
|
|
|
|
|
" grade = retrieval_grader_mistral.invoke({\"question\": inputs[\"question\"], \"document\": inputs[\"doc_txt\"]})\n",
|
|
|
|
|
" return {\"grade\":grade['score']}"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -684,26 +696,6 @@
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"id": "becddb03-a738-41bd-a0e5-cd46c996893c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def predict_mistral(inputs: dict) -> dict:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Returns JSON\n",
|
|
|
|
|
" grade = retrieval_grader_mistral.invoke({\"question\": inputs[\"question\"], \"document\": inputs[\"doc_txt\"]})\n",
|
|
|
|
|
" return {\"grade\":grade['score']}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def predict_oai(inputs: dict) -> dict:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Returns pydantic object\n",
|
|
|
|
|
" grade = retrieval_grader_oai.invoke({\"question\": inputs[\"question\"], \"document\": inputs[\"doc_txt\"]})\n",
|
|
|
|
|
" return {\"grade\":grade.score}"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 27,
|
|
|
|
@ -875,44 +867,12 @@
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "91604245-abc1-4177-b517-fb5cc01ba017",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "bf249e42-ce05-4502-992a-df0c57dc1573",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "7d30d196-0c09-482b-921b-4c247913f46b",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "ac68a2f2-7c20-4d9a-8dbe-964185713e4f",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "486cd37c-a783-479e-b8b7-91aaec0660a6",
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "4189003c-0293-4edf-ba85-5ea438ce17b9",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
"source": [
|
|
|
|
|
"# TODO: Add "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"attachments": {
|
|
|
|
@ -924,7 +884,7 @@
|
|
|
|
|
"id": "48b6ce44-4d65-4ad3-b37f-dbbf8e4543c4",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# 8. Trace tagging and feedback\n",
|
|
|
|
|
"# Trace tagging and feedback\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Nice blog post on some best practices:\n",
|
|
|
|
|
"\n",
|
|
|
|
@ -1125,7 +1085,7 @@
|
|
|
|
|
"id": "8590debc-93a5-4d1f-9bde-53e91834aea9",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# 9. Evaluating RAG\n",
|
|
|
|
|
"# Evaluating RAG\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Nice blog post on some best practices:\n",
|
|
|
|
|
"\n",
|
|
|
|
@ -1346,7 +1306,7 @@
|
|
|
|
|
"id": "fde3aa77-6959-465b-bbc1-c9d1ddc6c9a5",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# 10. Online unit tests \n",
|
|
|
|
|
"# Online unit tests \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Nice blog post on some best practices:\n",
|
|
|
|
|
"\n",
|
|
|
|
|