|
@@ -241,10 +241,8 @@
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
"source": [
|
|
|
"model = meta-llama/Llama-2-7b-chat-hf \n",
|
|
|
"volume = $PWD/data \n",
|
|
@@ -265,7 +263,7 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "!curl localhost:8080/generate -X POST -H 'Content-Type: application/json' -d '{\"inputs\": \"What is good about Beijing?\", \"parameters\": { \"max_new_tokens\":64}}' "
|
|
|
+ "!curl localhost:8080/generate -X POST -H 'Content-Type: application/json' -d '{\"inputs\": \"What is good about Beijing?\", \"parameters\": { \"max_new_tokens\":64}}' #Replace the locahost with the IP visible to the machine running the notebook "
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -290,7 +288,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 52,
|
|
|
+ "execution_count": 9,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -312,7 +310,7 @@
|
|
|
"DB_FAISS_PATH = 'vectorstore/db_faiss'\n",
|
|
|
"\n",
|
|
|
"#Llama2 TGI models host port\n",
|
|
|
- "LLAMA2_7B_HOSTPORT = \"http://localhost:8080/\"\n",
|
|
|
+ "LLAMA2_7B_HOSTPORT = \"http://localhost:8080/\" #Replace the locahost with the IP visible to the machine running the notebook\n",
|
|
|
"LLAMA2_13B_HOSTPORT = \"http://localhost:8080/\" #Add your own host ports for model switching. You can host another TGI model on same instance on a different port.\n",
|
|
|
"\n",
|
|
|
"\n",
|
|
@@ -333,7 +331,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 53,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -351,7 +349,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 54,
|
|
|
+ "execution_count": 11,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -378,12 +376,12 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 81,
|
|
|
+ "execution_count": 12,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"template = \"\"\"\n",
|
|
|
- "[INST]Use the following pieces of context to answer the question.\n",
|
|
|
+ "[INST]Use the following pieces of context to answer the question. If no context provided, answer like a AI assistant.\n",
|
|
|
"{context}\n",
|
|
|
"Question: {question} [/INST]\n",
|
|
|
"\"\"\"\n",
|
|
@@ -402,7 +400,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 82,
|
|
|
+ "execution_count": 13,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -445,7 +443,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 84,
|
|
|
+ "execution_count": 14,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -472,7 +470,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 85,
|
|
|
+ "execution_count": 15,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -634,9 +632,39 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Running on local URL: http://0.0.0.0:7860\n",
|
|
|
+ "\n",
|
|
|
+ "To create a public link, set `share=True` in `launch()`.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div><iframe src=\"http://localhost:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<IPython.core.display.HTML object>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": []
|
|
|
+ },
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
"demo.queue().launch(server_name=\"0.0.0.0\")"
|
|
|
]
|
|
@@ -649,7 +677,6 @@
|
|
|
"Once launched, in the notebook or a browser with URL http://0.0.0.0:7860, you should see the UI. \n",
|
|
|
"Things to try in the chatbot demo: \n",
|
|
|
"* Asking specific questions related to the Llama 2 Getting Started Guide\n",
|
|
|
- "* Streaming\n",
|
|
|
"* Adjust parameters such as max new token generated\n",
|
|
|
"* Switching to another Llama model with another container launched in a separate terminal\n",
|
|
|
"\n",
|