Browse Source

updated examples to (1) use OctoAI embeddings, and (2) not state the decommissioned int4 models

Thierry Moreau 1 year ago
parent
commit
bd23f8b8f1

+ 0 - 2
demo_apps/OctoAI_API_examples/HelloLlamaCloud.ipynb

@@ -61,11 +61,9 @@
     "\n",
     "At the time of writing this notebook the following Llama models are available on OctoAI:\n",
     "* llama-2-13b-chat-fp16\n",
-    "* llama-2-70b-chat-int4\n",
     "* llama-2-70b-chat-fp16\n",
     "* codellama-7b-instruct-fp16\n",
     "* codellama-13b-instruct-fp16\n",
-    "* codellama-34b-instruct-int4\n",
     "* codellama-34b-instruct-fp16\n",
     "* codellama-70b-instruct-fp16"
    ]

+ 7 - 8
demo_apps/OctoAI_API_examples/LiveData.ipynb

@@ -104,11 +104,9 @@
     "\n",
     "At the time of writing this notebook the following Llama models are available on OctoAI:\n",
     "* llama-2-13b-chat-fp16\n",
-    "* llama-2-70b-chat-int4\n",
     "* llama-2-70b-chat-fp16\n",
     "* codellama-7b-instruct-fp16\n",
     "* codellama-13b-instruct-fp16\n",
-    "* codellama-34b-instruct-int4\n",
     "* codellama-34b-instruct-fp16\n",
     "* codellama-70b-instruct-fp16"
    ]
@@ -217,10 +215,9 @@
    "source": [
     "With the data set up, we create a vector store for the data and a query engine for it.\n",
     "\n",
-    "For our embeddings we will use `HuggingFaceEmbeddings` whose default embedding model is sentence-transformers/all-mpnet-base-v2. This model provides a good balance between speed and performance.\n",
-    "To change the default model, call `HuggingFaceEmbeddings(model_name=<another_embedding_model>)`. \n",
+    "For our embeddings we will use `OctoAIEmbeddings` whose default embedding model is GTE-Large. This model provides a good balance between speed and performance.\n",
     "\n",
-    "For more info see https://huggingface.co/blog/mteb. "
+    "For more info see https://octoai.cloud/tools/text/embeddings?mode=demo&model=thenlper%2Fgte-large. "
    ]
   },
   {
@@ -230,12 +227,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# use HuggingFace embeddings \n",
-    "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
+    "# use OctoAI embeddings \n",
+    "from langchain_community.embeddings import OctoAIEmbeddings\n",
     "from llama_index.embeddings import LangchainEmbedding\n",
     "\n",
     "\n",
-    "embeddings = LangchainEmbedding(HuggingFaceEmbeddings())\n",
+    "embeddings = LangchainEmbedding(OctoAIEmbeddings(\n",
+    "    endpoint_url=\"https://text.octoai.run/v1/embeddings\"\n",
+    "))\n",
     "print(embeddings)\n",
     "\n",
     "# create a ServiceContext instance to use Llama2 and custom embeddings\n",

File diff suppressed because it is too large
+ 9 - 42
demo_apps/OctoAI_API_examples/RAG_Chatbot_example/RAG_Chatbot_Example.ipynb


BIN
demo_apps/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.faiss


BIN
demo_apps/OctoAI_API_examples/RAG_Chatbot_example/vectorstore/db_faiss/index.pkl


+ 1 - 3
demo_apps/OctoAI_API_examples/VideoSummary.ipynb

@@ -110,7 +110,7 @@
     "import os\n",
     "\n",
     "OCTOAI_API_TOKEN = getpass()\n",
-    "os.environ[\"OCTOAI_API_TOKEN\"] = OCTOAI_API_TOKEN\n"
+    "os.environ[\"OCTOAI_API_TOKEN\"] = OCTOAI_API_TOKEN"
    ]
   },
   {
@@ -122,11 +122,9 @@
     "\n",
     "At the time of writing this notebook the following Llama models are available on OctoAI:\n",
     "* llama-2-13b-chat-fp16\n",
-    "* llama-2-70b-chat-int4\n",
     "* llama-2-70b-chat-fp16\n",
     "* codellama-7b-instruct-fp16\n",
     "* codellama-13b-instruct-fp16\n",
-    "* codellama-34b-instruct-int4\n",
     "* codellama-34b-instruct-fp16\n",
     "* codellama-70b-instruct-fp16\n",
     "\n",