1 anno fa · ff2ed1b2c8
--- a/demo_apps/RAG_Chatbot_example/RAG_Chatbot_Example.ipynb
+++ b/demo_apps/RAG_Chatbot_example/RAG_Chatbot_Example.ipynb
@@ -241,10 +241,8 @@
 
				    ]
			
 
				   },
			
 
				   {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				+   "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				-   "outputs": [],
			
 
				    "source": [
			
 
				     "model = meta-llama/Llama-2-7b-chat-hf  \n",
			
 
				     "volume = $PWD/data  \n",
			
@@ -265,7 +263,7 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "!curl localhost:8080/generate -X POST -H 'Content-Type: application/json' -d '{\"inputs\": \"What is good about Beijing?\", \"parameters\": { \"max_new_tokens\":64}}'     "
			
 
				+    "!curl localhost:8080/generate -X POST -H 'Content-Type: application/json' -d '{\"inputs\": \"What is good about Beijing?\", \"parameters\": { \"max_new_tokens\":64}}' #Replace the locahost with the IP visible to the machine running the notebook     "
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -290,7 +288,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 52,
			
 
				+   "execution_count": 9,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -312,7 +310,7 @@
 
				     "DB_FAISS_PATH = 'vectorstore/db_faiss'\n",
			
 
				     "\n",
			
 
				     "#Llama2 TGI models host port\n",
			
 
				-    "LLAMA2_7B_HOSTPORT = \"http://localhost:8080/\"\n",
			
 
				+    "LLAMA2_7B_HOSTPORT = \"http://localhost:8080/\" #Replace the locahost with the IP visible to the machine running the notebook\n",
			
 
				     "LLAMA2_13B_HOSTPORT = \"http://localhost:8080/\" #Add your own host ports for model switching. You can host another TGI model on same instance on a different port.\n",
			
 
				     "\n",
			
 
				     "\n",
			
@@ -333,7 +331,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 53,
			
 
				+   "execution_count": null,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -351,7 +349,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 54,
			
 
				+   "execution_count": 11,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -378,12 +376,12 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 81,
			
 
				+   "execution_count": 12,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "template = \"\"\"\n",
			
 
				-    "[INST]Use the following pieces of context to answer the question.\n",
			
 
				+    "[INST]Use the following pieces of context to answer the question. If no context provided, answer like a AI assistant.\n",
			
 
				     "{context}\n",
			
 
				     "Question: {question} [/INST]\n",
			
 
				     "\"\"\"\n",
			
@@ -402,7 +400,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 82,
			
 
				+   "execution_count": 13,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -445,7 +443,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 84,
			
 
				+   "execution_count": 14,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -472,7 +470,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 85,
			
 
				+   "execution_count": 15,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -634,9 +632,39 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				+   "execution_count": 16,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Running on local URL:  http://0.0.0.0:7860\n",
			
 
				+      "\n",
			
 
				+      "To create a public link, set `share=True` in `launch()`.\n"
			
 
				+     ]
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<div><iframe src=\"http://localhost:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
			
 
				+      ],
			
 
				+      "text/plain": [
			
 
				+       "<IPython.core.display.HTML object>"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": []
			
 
				+     },
			
 
				+     "execution_count": 16,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				    "source": [
			
 
				     "demo.queue().launch(server_name=\"0.0.0.0\")"
			
 
				    ]
			
@@ -649,7 +677,6 @@
 
				     "Once launched, in the notebook or a browser with URL http://0.0.0.0:7860, you should see the UI.  \n",
			
 
				     "Things to try in the chatbot demo:  \n",
			
 
				     "* Asking specific questions related to the Llama 2 Getting Started Guide\n",
			
 
				-    "* Streaming\n",
			
 
				     "* Adjust parameters such as max new token generated\n",
			
 
				     "* Switching to another Llama model with another container launched in a separate terminal\n",
			
 
				     "\n",