1 rok temu · 410d8cf486
--- a/llama-demo-apps/BreakingNews.ipynb
+++ b/llama-demo-apps/BreakingNews.ipynb
@@ -82,25 +82,17 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 4,
															
 
																+   "execution_count": null,
															
 
																    "id": "c12fc2cb",
															
 
																    "metadata": {},
															
 
																-   "outputs": [
															
 
																-    {
															
 
																-     "name": "stderr",
															
 
																-     "output_type": "stream",
															
 
																-     "text": [
															
 
																-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
															
 
																-     ]
															
 
																-    }
															
 
																-   ],
															
 
																+   "outputs": [],
															
 
																    "source": [
															
 
																     "# set llm to be using Llama2 hosted on Replicate\n",
															
 
																     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
															
 
																     "\n",
															
 
																     "llm = Replicate(\n",
															
 
																     "    model=llama2_13b_chat,\n",
															
 
																-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
															
 
																+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
															
 
																     ")"
															
 
																    ]
															
 
																   },
															
--- a/llama-demo-apps/HelloLlamaCloud.ipynb
+++ b/llama-demo-apps/HelloLlamaCloud.ipynb
@@ -20,7 +20,7 @@
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																-    "!pip install langchain replicate sentence-transformers"
															
 
																+    "!pip install langchain replicate sentence-transformers chromadb"
															
 
																    ]
															
 
																   },
															
 
																   {
															
@@ -47,25 +47,17 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 3,
															
 
																+   "execution_count": null,
															
 
																    "id": "ad536adb",
															
 
																    "metadata": {},
															
 
																-   "outputs": [
															
 
																-    {
															
 
																-     "name": "stderr",
															
 
																-     "output_type": "stream",
															
 
																-     "text": [
															
 
																-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
															
 
																-     ]
															
 
																-    }
															
 
																-   ],
															
 
																+   "outputs": [],
															
 
																    "source": [
															
 
																     "from langchain.llms import Replicate\n",
															
 
																     "\n",
															
 
																     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
															
 
																     "llm = Replicate(\n",
															
 
																     "    model=llama2_13b,\n",
															
 
																-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
															
 
																+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
															
 
																     ")"
															
 
																    ]
															
 
																   },
															
@@ -220,7 +212,7 @@
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																-    "# there're more 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
															
 
																+    "# there're more than 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
															
 
																     "# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
															
 
																     "from langchain.vectorstores import Chroma\n",
															
 
																     "\n",
															
@@ -238,7 +230,9 @@
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																-    "# split the loaded documents into chunks \n",
															
 
																+    "# split the loaded documents into chunks. \n",
															
 
																+    "# in genreral, use larger chuck sizes for highly structured text such as code and smaller size for \n",
															
 
																+    "# less structured text. you may need to experiment with different chunk sizes and overlap values to find out the best numbers.\n",
															
 
																     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
															
 
																     "all_splits = text_splitter.split_documents(docs)\n",
															
 
																     "\n",
															
@@ -387,7 +381,10 @@
 
																     "chat_history.append((followup, followup_answer[\"answer\"]))\n",
															
 
																     "more_followup = \"what tasks can it assist with?\"\n",
															
 
																     "more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
															
 
																-    "print(more_followup_answer['answer'])"
															
 
																+    "print(more_followup_answer['answer'])\n",
															
 
																+    "\n",
															
 
																+    "# results get cut off - you may set \"max_new_tokens\" in the Replicate call above to a larger number (like 1000 below) to avoid the cut off\n",
															
 
																+    "#    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\": 1000}"
															
 
																    ]
															
 
																   }
															
 
																  ],
															
--- a/llama-demo-apps/Llama2_Gradio.ipynb
+++ b/llama-demo-apps/Llama2_Gradio.ipynb
@@ -55,7 +55,7 @@
 
																     "\n",
															
 
																     "llm = Replicate(\n",
															
 
																     "    model=llama2_13b_chat,\n",
															
 
																-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
															
 
																+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
															
 
																     ")\n",
															
 
																     "\n",
															
 
																     "\n",
															
--- a/llama-demo-apps/README.md
+++ b/llama-demo-apps/README.md
@@ -1,8 +1,8 @@
 
																 # Llama2 Demo Apps 
															
 
																-This folder showcases the Llama2-powered apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
															
 
																+This folder showcases Llama2-powered demo apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
															
 
																-Here we start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
															
 
																+We start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
															
 
																 We then show three demos that ask Llama2 to summarize a YouTube video, to answer questions about structured data stored in a database, and to answer questions about live search results.
															
@@ -10,6 +10,21 @@ We also show how to build quick web UI for Llama2 demo apps using Streamlit and
 
																 More advanced Llama2 demo apps will be coming soon.
															
 
																+## Setting Up Environment
															
 
																+
															
 
																+The quickest way to test run the notebook demo apps on your local machine is to create a Conda envinronment and start running the Jupyter notebook as follows:
															
 
																+```
															
 
																+conda create -n llama-demo-apps python=3.8
															
 
																+conda activate llama-demo-apps
															
 
																+pip install jupyter
															
 
																+cd <your_work_folder>
															
 
																+git clone https://github.com/facebookresearch/llama-recipes
															
 
																+cd llama-recipes/llama-demo-apps
															
 
																+jupyter notebook
															
 
																+```
															
 
																+
															
 
																+You can also upload the notebooks to Google Colab.
															
 
																+
															
 
																 ## HelloLlama - Quickstart in Running Llama2 (Almost) Everywhere*
															
 
																 The first three demo apps show:
															
@@ -19,7 +34,7 @@ The first three demo apps show:
 
																 * how to ask follow up questions to Llama by sending previous questions and answers as the context along with the new question, hence performing multi-turn chat or conversation with Llama.
															
 
																 ### [Running Llama2 Locally on Mac](HelloLlamaLocal.ipynb)
															
 
																-To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://drive.google.com/file/d/1afPv3HOy73BE2MoYCgYJvBDeQNa9rZbj/view?usp=sharing), or to the `ggml-model-q4_0.gguf` file built with the following commands:
															
 
																+To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf), or to the `ggml-model-q4_0.gguf` file built with the following commands:
															
 
																 ```
															
 
																 git clone https://github.com/ggerganov/llama.cpp
															
 
																 cd llama.cpp
															
--- a/llama-demo-apps/StructuredLlama.ipynb
+++ b/llama-demo-apps/StructuredLlama.ipynb
@@ -57,18 +57,10 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 3,
															
 
																+   "execution_count": null,
															
 
																    "id": "9dcd744c",
															
 
																    "metadata": {},
															
 
																-   "outputs": [
															
 
																-    {
															
 
																-     "name": "stderr",
															
 
																-     "output_type": "stream",
															
 
																-     "text": [
															
 
																-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
															
 
																-     ]
															
 
																-    }
															
 
																-   ],
															
 
																+   "outputs": [],
															
 
																    "source": [
															
 
																     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
															
 
																     "\n",
															
@@ -76,7 +68,7 @@
 
																     "# \"Sure! Here's the SQL query for the given input question: \" before the SQL query; otherwise custom parsing will be needed.\n",
															
 
																     "llm = Replicate(\n",
															
 
																     "    model=llama2_13b_chat,\n",
															
 
																-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
															
 
																+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
															
 
																     ")"
															
 
																    ]
															
 
																   },
															
@@ -89,10 +81,6 @@
 
																    "source": [
															
 
																     "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info= 0)\n",
															
 
																     "\n",
															
 
																-    "# use the default sqlite prompt defined in \n",
															
 
																-    "# https://github.com/langchain-ai/langchain/blob/33eb5f8300cd21c91a2f8d10c62197637931fa0a/libs/langchain/langchain/chains/sql_database/prompt.py#L211\n",
															
 
																-    "# db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)\n",
															
 
																-    "\n",
															
 
																     "# customize the default sqlite prompt defined in the link above\n",
															
 
																     "PROMPT_SUFFIX = \"\"\"\n",
															
 
																     "Only use the following tables:\n",
															
--- a/llama-demo-apps/VideoSummary.ipynb
+++ b/llama-demo-apps/VideoSummary.ipynb
@@ -8,7 +8,7 @@
 
																     "## This demo app shows:\n",
															
 
																     "* how to use LangChain's YoutubeLoader to retrieve the caption in a YouTube video;\n",
															
 
																     "* how to ask Llama to summarize the content (per the Llama's input size limit) of the video in a naive way using LangChain's stuff method;\n",
															
 
																-    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods."
															
 
																+    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods - see [here](https://python.langchain.com/docs/use_cases/summarization) for more info."
															
 
																    ]
															
 
																   },
															
 
																   {
															
@@ -94,18 +94,10 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 5,
															
 
																+   "execution_count": null,
															
 
																    "id": "adf8cf3d",
															
 
																    "metadata": {},
															
 
																-   "outputs": [
															
 
																-    {
															
 
																-     "name": "stderr",
															
 
																-     "output_type": "stream",
															
 
																-     "text": [
															
 
																-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
															
 
																-     ]
															
 
																-    }
															
 
																-   ],
															
 
																+   "outputs": [],
															
 
																    "source": [
															
 
																     "# set llm to be Llama2-13b model; if you use local Llama, just set llm accordingly - see the HelloLlamaLocal notebook\n",
															
 
																     "from langchain.llms import Replicate\n",
															
@@ -113,7 +105,7 @@
 
																     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
															
 
																     "llm = Replicate(\n",
															
 
																     "    model=llama2_13b,\n",
															
 
																-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
															
 
																+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
															
 
																     ")"
															
 
																    ]
															
 
																   },
															
--- a/llama-demo-apps/streamlit_llama2.py
+++ b/llama-demo-apps/streamlit_llama2.py
@@ -12,11 +12,11 @@ def generate_response(input_text):
 
																     llm = Replicate(
															
 
																         model=llama2_13b_chat,
															
 
																-        input={"temperature": 0.01, "max_length": 2000, "top_p": 1},
															
 
																+        model_kwargs={"temperature": 0.01, "top_p": 1, "max_new_tokens":500}
															
 
																     )
															
 
																     st.info(llm(input_text))
															
 
																 with st.form("my_form"):
															
 
																     text = st.text_area("Enter text:", "What is Generative AI?")
															
 
																     submitted = st.form_submit_button("Submit")
															
 
																-    generate_response(text)
															
 
																+    generate_response(text)