|
@@ -20,7 +20,7 @@
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
- "!pip install langchain replicate sentence-transformers"
|
|
|
|
|
|
+ "!pip install langchain replicate sentence-transformers chromadb"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
{
|
|
{
|
|
@@ -47,25 +47,17 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 3,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"id": "ad536adb",
|
|
"id": "ad536adb",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stderr",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "Init param `input` is deprecated, please use `model_kwargs` instead.\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"from langchain.llms import Replicate\n",
|
|
"from langchain.llms import Replicate\n",
|
|
"\n",
|
|
"\n",
|
|
"llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
|
|
"llama2_13b = \"meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d\"\n",
|
|
"llm = Replicate(\n",
|
|
"llm = Replicate(\n",
|
|
" model=llama2_13b,\n",
|
|
" model=llama2_13b,\n",
|
|
- " input={\"temperature\": 0.01, \"max_length\": 500, \"top_p\": 1},\n",
|
|
|
|
|
|
+ " model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\":500}\n",
|
|
")"
|
|
")"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
@@ -220,7 +212,7 @@
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
- "# there're more 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
|
|
|
|
|
|
+ "# there're more than 30 vector stores (DBs) supported by LangChain. Chroma is light-weight and in memory so it's easy to get started with\n",
|
|
"# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
|
|
"# other vector stores can be used to store large amount of data - see https://python.langchain.com/docs/integrations/vectorstores\n",
|
|
"from langchain.vectorstores import Chroma\n",
|
|
"from langchain.vectorstores import Chroma\n",
|
|
"\n",
|
|
"\n",
|
|
@@ -238,7 +230,9 @@
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
- "# split the loaded documents into chunks \n",
|
|
|
|
|
|
+ "# split the loaded documents into chunks. \n",
|
|
|
|
+ "# in genreral, use larger chuck sizes for highly structured text such as code and smaller size for \n",
|
|
|
|
+ "# less structured text. you may need to experiment with different chunk sizes and overlap values to find out the best numbers.\n",
|
|
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
|
|
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)\n",
|
|
"all_splits = text_splitter.split_documents(docs)\n",
|
|
"all_splits = text_splitter.split_documents(docs)\n",
|
|
"\n",
|
|
"\n",
|
|
@@ -387,7 +381,10 @@
|
|
"chat_history.append((followup, followup_answer[\"answer\"]))\n",
|
|
"chat_history.append((followup, followup_answer[\"answer\"]))\n",
|
|
"more_followup = \"what tasks can it assist with?\"\n",
|
|
"more_followup = \"what tasks can it assist with?\"\n",
|
|
"more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
|
|
"more_followup_answer = chat_chain({\"question\": more_followup, \"chat_history\": chat_history})\n",
|
|
- "print(more_followup_answer['answer'])"
|
|
|
|
|
|
+ "print(more_followup_answer['answer'])\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# results get cut off - you may set \"max_new_tokens\" in the Replicate call above to a larger number (like 1000 below) to avoid the cut off\n",
|
|
|
|
+ "# model_kwargs={\"temperature\": 0.01, \"top_p\": 1, \"max_new_tokens\": 1000}"
|
|
]
|
|
]
|
|
}
|
|
}
|
|
],
|
|
],
|