il y a 1 an · 410d8cf486
--- a/llama-demo-apps/BreakingNews.ipynb
+++ b/llama-demo-apps/BreakingNews.ipynb
@@ -82,25 +82,17 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				+   "execution_count": null,
			
 
				    "id": "c12fc2cb",
			
 
				    "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "# set llm to be using Llama2 hosted on Replicate\n",
			
 
				     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
			
 
				     "\n",
			
 
				     "llm = Replicate(\n",
			
 
				     "    model=llama2_13b_chat,\n",
			
 
				-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
			
 
				+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
--- a/llama-demo-apps/HelloLlamaCloud.ipynb
+++ b/llama-demo-apps/HelloLlamaCloud.ipynb
@@ -20,7 +20,7 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "!pip install langchain replicate sentence-transformers"
			
 
				+    "!pip install langchain replicate sentence-transformers chromadb"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -47,25 +47,17 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": null,
			
 
				    "id": "ad536adb",
			
 
				    "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "from langchain.llms import Replicate\n",
			
 
				     "\n",
			
 
				     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
			
 
				     "llm = Replicate(\n",
			
 
				     "    model=llama2_13b,\n",
			
 
				-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
			
 
				+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
@@ -220,7 +212,7 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "# there're more 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
			
 
				+    "# there're more than 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
			
 
				     "# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
			
 
				     "from langchain.vectorstores import Chroma\n",
			
 
				     "\n",
			
@@ -238,7 +230,9 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "# split the loaded documents into chunks \n",
			
 
				+    "# split the loaded documents into chunks. \n",
			
 
				+    "# in genreral, use larger chuck sizes for highly structured text such as code and smaller size for \n",
			
 
				+    "# less structured text. you may need to experiment with different chunk sizes and overlap values to find out the best numbers.\n",
			
 
				     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
			
 
				     "all_splits = text_splitter.split_documents(docs)\n",
			
 
				     "\n",
			
@@ -387,7 +381,10 @@
 
				     "chat_history.append((followup, followup_answer[\"answer\"]))\n",
			
 
				     "more_followup = \"what tasks can it assist with?\"\n",
			
 
				     "more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
			
 
				-    "print(more_followup_answer['answer'])"
			
 
				+    "print(more_followup_answer['answer'])\n",
			
 
				+    "\n",
			
 
				+    "# results get cut off - you may set \"max_new_tokens\" in the Replicate call above to a larger number (like 1000 below) to avoid the cut off\n",
			
 
				+    "#    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\": 1000}"
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/llama-demo-apps/Llama2_Gradio.ipynb
+++ b/llama-demo-apps/Llama2_Gradio.ipynb
@@ -55,7 +55,7 @@
 
				     "\n",
			
 
				     "llm = Replicate(\n",
			
 
				     "    model=llama2_13b_chat,\n",
			
 
				-    "    input={\"temperature\": 0.01, \"max_length\": 2000, \"top_p\": 1},\n",
			
 
				+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
			
 
				     ")\n",
			
 
				     "\n",
			
 
				     "\n",
			
--- a/llama-demo-apps/README.md
+++ b/llama-demo-apps/README.md
@@ -1,8 +1,8 @@
 
				 # Llama2 Demo Apps 
			
 
				 
			
 
				-This folder showcases the Llama2-powered apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
			
 
				+This folder showcases Llama2-powered demo apps. If you need a general understanding of GenAI, Llama2, prompt engineering and RAG, be sure to first check the [Getting to know Llama 2 notebook](https://github.com/facebookresearch/llama-recipes/blob/main/examples/Getting_to_know_Llama.ipynb) and its Meta Connect video [here](https://www.facebook.com/watch/?v=662153709222699).
			
 
				 
			
 
				-Here we start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
			
 
				+We start with three quickstart demos showing how to run Llama2 locally on a Mac, remotely in the cloud, and on a Google Colab to ask Llama2 general questions or questions about unstructured data not trained for the model.
			
 
				 
			
 
				 We then show three demos that ask Llama2 to summarize a YouTube video, to answer questions about structured data stored in a database, and to answer questions about live search results.
			
 
				 
			
@@ -10,6 +10,21 @@ We also show how to build quick web UI for Llama2 demo apps using Streamlit and
 
				 
			
 
				 More advanced Llama2 demo apps will be coming soon.
			
 
				 
			
 
				+## Setting Up Environment
			
 
				+
			
 
				+The quickest way to test run the notebook demo apps on your local machine is to create a Conda envinronment and start running the Jupyter notebook as follows:
			
 
				+```
			
 
				+conda create -n llama-demo-apps python=3.8
			
 
				+conda activate llama-demo-apps
			
 
				+pip install jupyter
			
 
				+cd <your_work_folder>
			
 
				+git clone https://github.com/facebookresearch/llama-recipes
			
 
				+cd llama-recipes/llama-demo-apps
			
 
				+jupyter notebook
			
 
				+```
			
 
				+
			
 
				+You can also upload the notebooks to Google Colab.
			
 
				+
			
 
				 ## HelloLlama - Quickstart in Running Llama2 (Almost) Everywhere*
			
 
				 
			
 
				 The first three demo apps show:
			
@@ -19,7 +34,7 @@ The first three demo apps show:
 
				 * how to ask follow up questions to Llama by sending previous questions and answers as the context along with the new question, hence performing multi-turn chat or conversation with Llama.
			
 
				 
			
 
				 ### [Running Llama2 Locally on Mac](HelloLlamaLocal.ipynb)
			
 
				-To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://drive.google.com/file/d/1afPv3HOy73BE2MoYCgYJvBDeQNa9rZbj/view?usp=sharing), or to the `ggml-model-q4_0.gguf` file built with the following commands:
			
 
				+To run Llama2 locally on Mac using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), first open the notebook `HelloLlamaLocal`. Then replace `<path-to-ggml-model-q4_0.gguf>` in the notebook `HelloLlamaLocal` with the path either to your downloaded quantized model file [here](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf), or to the `ggml-model-q4_0.gguf` file built with the following commands:
			
 
				 ```
			
 
				 git clone https://github.com/ggerganov/llama.cpp
			
 
				 cd llama.cpp
			
--- a/llama-demo-apps/StructuredLlama.ipynb
+++ b/llama-demo-apps/StructuredLlama.ipynb
@@ -57,18 +57,10 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": null,
			
 
				    "id": "9dcd744c",
			
 
				    "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "llama2_13b_chat = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
			
 
				     "\n",
			
@@ -76,7 +68,7 @@
 
				     "# \"Sure! Here's the SQL query for the given input question: \" before the SQL query; otherwise custom parsing will be needed.\n",
			
 
				     "llm = Replicate(\n",
			
 
				     "    model=llama2_13b_chat,\n",
			
 
				-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
			
 
				+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500, \"system_prompt\": \"Given an input question, convert it to a SQL query. No pre-amble.\"},\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
@@ -89,10 +81,6 @@
 
				    "source": [
			
 
				     "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info= 0)\n",
			
 
				     "\n",
			
 
				-    "# use the default sqlite prompt defined in \n",
			
 
				-    "# https://github.com/langchain-ai/langchain/blob/33eb5f8300cd21c91a2f8d10c62197637931fa0a/libs/langchain/langchain/chains/sql_database/prompt.py#L211\n",
			
 
				-    "# db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)\n",
			
 
				-    "\n",
			
 
				     "# customize the default sqlite prompt defined in the link above\n",
			
 
				     "PROMPT_SUFFIX = \"\"\"\n",
			
 
				     "Only use the following tables:\n",
			
--- a/llama-demo-apps/VideoSummary.ipynb
+++ b/llama-demo-apps/VideoSummary.ipynb
@@ -8,7 +8,7 @@
 
				     "## This demo app shows:\n",
			
 
				     "* how to use LangChain's YoutubeLoader to retrieve the caption in a YouTube video;\n",
			
 
				     "* how to ask Llama to summarize the content (per the Llama's input size limit) of the video in a naive way using LangChain's stuff method;\n",
			
 
				-    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods."
			
 
				+    "* how to bypass the limit of Llama's max input token size by using more sophisticated way using LangChain's map_reduce and refine methods - see [here](https://python.langchain.com/docs/use_cases/summarization) for more info."
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -94,18 +94,10 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": null,
			
 
				    "id": "adf8cf3d",
			
 
				    "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "# set llm to be Llama2-13b model; if you use local Llama, just set llm accordingly - see the HelloLlamaLocal notebook\n",
			
 
				     "from langchain.llms import Replicate\n",
			
@@ -113,7 +105,7 @@
 
				     "llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
			
 
				     "llm = Replicate(\n",
			
 
				     "    model=llama2_13b,\n",
			
 
				-    "    input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
			
 
				+    "    model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
--- a/llama-demo-apps/streamlit_llama2.py
+++ b/llama-demo-apps/streamlit_llama2.py
@@ -12,11 +12,11 @@ def generate_response(input_text):
 
				 
			
 
				     llm = Replicate(
			
 
				         model=llama2_13b_chat,
			
 
				-        input={"temperature": 0.01, "max_length": 2000, "top_p": 1},
			
 
				+        model_kwargs={"temperature": 0.01, "top_p": 1, "max_new_tokens":500}
			
 
				     )
			
 
				     st.info(llm(input_text))
			
 
				 
			
 
				 with st.form("my_form"):
			
 
				     text = st.text_area("Enter text:", "What is Generative AI?")
			
 
				     submitted = st.form_submit_button("Submit")
			
 
				-    generate_response(text)
			
 
				+    generate_response(text)