Browse Source

update based on PR feedback

Jeff Tang 1 year ago
parent
commit
410d8cf486

+ 3 - 11
llama-demo-apps/BreakingNews.ipynb

@@ -82,25 +82,17 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "c12fc2cb",
    "id": "c12fc2cb",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "# set llm to be using Llama2 hosted on Replicate\n",
     "# set llm to be using Llama2 hosted on Replicate\n",
     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "\n",
     "\n",
     "llm = Replicate(\n",
     "llm = Replicate(\n",
     "    model=llama2_13b_chat,\n",
     "    model=llama2_13b_chat,\n",
-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
     ")"
     ")"
    ]
    ]
   },
   },

+ 12 - 15
llama-demo-apps/HelloLlamaCloud.ipynb

@@ -20,7 +20,7 @@
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
-    "!pip install langchain replicate sentence-transformers"
+    "!pip install langchain replicate sentence-transformers chromadb"
    ]
    ]
   },
   },
   {
   {
@@ -47,25 +47,17 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "ad536adb",
    "id": "ad536adb",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "from langchain.llms import Replicate\n",
     "from langchain.llms import Replicate\n",
     "\n",
     "\n",
     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llm = Replicate(\n",
     "llm = Replicate(\n",
     "    model=llama2_13b,\n",
     "    model=llama2_13b,\n",
-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
     ")"
     ")"
    ]
    ]
   },
   },
@@ -220,7 +212,7 @@
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
-    "# there're more 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
+    "# there're more than 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
     "# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
     "# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
     "from langchain.vectorstores import Chroma\n",
     "from langchain.vectorstores import Chroma\n",
     "\n",
     "\n",
@@ -238,7 +230,9 @@
    "metadata": {},
    "metadata": {},
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
-    "# split the loaded documents into chunks \n",
+    "# split the loaded documents into chunks. \n",
+    "# in genreral, use larger chuck sizes for highly structured text such as code and smaller size for \n",
+    "# less structured text. you may need to experiment with different chunk sizes and overlap values to find out the best numbers.\n",
     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
     "all_splits = text_splitter.split_documents(docs)\n",
     "all_splits = text_splitter.split_documents(docs)\n",
     "\n",
     "\n",
@@ -387,7 +381,10 @@
     "chat_history.append((followup, followup_answer[\"answer\"]))\n",
     "chat_history.append((followup, followup_answer[\"answer\"]))\n",
     "more_followup = \"what tasks can it assist with?\"\n",
     "more_followup = \"what tasks can it assist with?\"\n",
     "more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
     "more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
-    "print(more_followup_answer['answer'])"
+    "print(more_followup_answer['answer'])\n",
+    "\n",
+    "# results get cut off - you may set \"max_new_tokens\" in the Replicate call above to a larger number (like 1000 below) to avoid the cut off\n",
+    "#    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\": 1000}"
    ]
    ]
   }
   }
  ],
  ],

+ 1 - 1
llama-demo-apps/Llama2_Gradio.ipynb

@@ -55,7 +55,7 @@
     "\n",
     "\n",
     "llm = Replicate(\n",
     "llm = Replicate(\n",
     "    model=llama2_13b_chat,\n",
     "    model=llama2_13b_chat,\n",
-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
     ")\n",
     ")\n",
     "\n",
     "\n",
     "\n",
     "\n",

+ 18 - 3
llama-demo-apps/README.md

@@ -1,8 +1,8 @@
 # Llama2 Demo Apps 
 # Llama2 Demo Apps 
 
 
-This folder showcases the Llama2-powered apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
+This folder showcases Llama2-powered demo apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
 
 
-Here we start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
+We start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
 
 
 We then show three demos that ask Llama2 to summarize a YouTube video, to answer questions about structured data stored in a database, and to answer questions about live search results.
 We then show three demos that ask Llama2 to summarize a YouTube video, to answer questions about structured data stored in a database, and to answer questions about live search results.
 
 
@@ -10,6 +10,21 @@ We also show how to build quick web UI for Llama2 demo apps using Streamlit and
 
 
 More advanced Llama2 demo apps will be coming soon.
 More advanced Llama2 demo apps will be coming soon.
 
 
+## Setting Up Environment
+
+The quickest way to test run the notebook demo apps on your local machine is to create a Conda envinronment and start running the Jupyter notebook as follows:
+```
+conda create -n llama-demo-apps python=3.8
+conda activate llama-demo-apps
+pip install jupyter
+cd <your_work_folder>
+git clone https://github.com/facebookresearch/llama-recipes
+cd llama-recipes/llama-demo-apps
+jupyter notebook
+```
+
+You can also upload the notebooks to Google Colab.
+
 ## HelloLlama - Quickstart in Running Llama2 (Almost) Everywhere*
 ## HelloLlama - Quickstart in Running Llama2 (Almost) Everywhere*
 
 
 The first three demo apps show:
 The first three demo apps show:
@@ -19,7 +34,7 @@ The first three demo apps show:
 * how to ask follow up questions to Llama by sending previous questions and answers as the context along with the new question, hence performing multi-turn chat or conversation with Llama.
 * how to ask follow up questions to Llama by sending previous questions and answers as the context along with the new question, hence performing multi-turn chat or conversation with Llama.
 
 
 ### [Running Llama2 Locally on Mac](HelloLlamaLocal.ipynb)
 ### [Running Llama2 Locally on Mac](HelloLlamaLocal.ipynb)
-To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://drive.google.com/file/d/1afPv3HOy73BE2MoYCgYJvBDeQNa9rZbj/view?usp=sharing), or to the `ggml-model-q4_0.gguf` file built with the following commands:
+To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf), or to the `ggml-model-q4_0.gguf` file built with the following commands:
 ```
 ```
 git clone https://github.com/ggerganov/llama.cpp
 git clone https://github.com/ggerganov/llama.cpp
 cd llama.cpp
 cd llama.cpp

+ 3 - 15
llama-demo-apps/StructuredLlama.ipynb

@@ -57,18 +57,10 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "9dcd744c",
    "id": "9dcd744c",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "\n",
     "\n",
@@ -76,7 +68,7 @@
     "# \"Sure! Here's the SQL query for the given input question: \" before the SQL query; otherwise custom parsing will be needed.\n",
     "# \"Sure! Here's the SQL query for the given input question: \" before the SQL query; otherwise custom parsing will be needed.\n",
     "llm = Replicate(\n",
     "llm = Replicate(\n",
     "    model=llama2_13b_chat,\n",
     "    model=llama2_13b_chat,\n",
-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
     ")"
     ")"
    ]
    ]
   },
   },
@@ -89,10 +81,6 @@
    "source": [
    "source": [
     "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info= 0)\n",
     "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info= 0)\n",
     "\n",
     "\n",
-    "# use the default sqlite prompt defined in \n",
-    "# https://github.com/langchain-ai/langchain/blob/33eb5f8300cd21c91a2f8d10c62197637931fa0a/libs/langchain/langchain/chains/sql_database/prompt.py#L211\n",
-    "# db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)\n",
-    "\n",
     "# customize the default sqlite prompt defined in the link above\n",
     "# customize the default sqlite prompt defined in the link above\n",
     "PROMPT_SUFFIX = \"\"\"\n",
     "PROMPT_SUFFIX = \"\"\"\n",
     "Only use the following tables:\n",
     "Only use the following tables:\n",

+ 4 - 12
llama-demo-apps/VideoSummary.ipynb

@@ -8,7 +8,7 @@
     "## This demo app shows:\n",
     "## This demo app shows:\n",
     "* how to use LangChain's YoutubeLoader to retrieve the caption in a YouTube video;\n",
     "* how to use LangChain's YoutubeLoader to retrieve the caption in a YouTube video;\n",
     "* how to ask Llama to summarize the content (per the Llama's input size limit) of the video in a naive way using LangChain's stuff method;\n",
     "* how to ask Llama to summarize the content (per the Llama's input size limit) of the video in a naive way using LangChain's stuff method;\n",
-    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods."
+    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods - see [here](https://python.langchain.com/docs/use_cases/summarization) for more info."
    ]
    ]
   },
   },
   {
   {
@@ -94,18 +94,10 @@
   },
   },
   {
   {
    "cell_type": "code",
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "adf8cf3d",
    "id": "adf8cf3d",
    "metadata": {},
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
    "source": [
     "# set llm to be Llama2-13b model; if you use local Llama, just set llm accordingly - see the HelloLlamaLocal notebook\n",
     "# set llm to be Llama2-13b model; if you use local Llama, just set llm accordingly - see the HelloLlamaLocal notebook\n",
     "from langchain.llms import Replicate\n",
     "from langchain.llms import Replicate\n",
@@ -113,7 +105,7 @@
     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
     "llm = Replicate(\n",
     "llm = Replicate(\n",
     "    model=llama2_13b,\n",
     "    model=llama2_13b,\n",
-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
     ")"
     ")"
    ]
    ]
   },
   },

+ 2 - 2
llama-demo-apps/streamlit_llama2.py

@@ -12,11 +12,11 @@ def generate_response(input_text):
 
 
     llm = Replicate(
     llm = Replicate(
         model=llama2_13b_chat,
         model=llama2_13b_chat,
-        input={"temperature": 0.01, "max_length": 2000, "top_p": 1},
+        model_kwargs={"temperature": 0.01, "top_p": 1, "max_new_tokens":500}
     )
     )
     st.info(llm(input_text))
     st.info(llm(input_text))
 
 
 with st.form("my_form"):
 with st.form("my_form"):
     text = st.text_area("Enter text:", "What is Generative AI?")
     text = st.text_area("Enter text:", "What is Generative AI?")
     submitted = st.form_submit_button("Submit")
     submitted = st.form_submit_button("Submit")
-    generate_response(text)
+    generate_response(text)