Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:
Note After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.
To run this example:
In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.
Let's start by installing the necessary packages:
And setting up the OctoAI token.
!pip install langchain octoai-sdk gradio
from getpass import getpass
import os
OCTOAI_API_TOKEN = getpass()
os.environ["OCTOAI_API_TOKEN"] = OCTOAI_API_TOKEN
from langchain.schema import AIMessage, HumanMessage
import gradio as gr
from langchain.llms.octoai_endpoint import OctoAIEndpoint
llama2_13b = "llama-2-13b-chat-fp16"
llm = OctoAIEndpoint(
endpoint_url="https://text.octoai.run/v1/chat/completions",
model_kwargs={
"model": llama2_13b,
"messages": [
{
"role": "system",
"content": "You are a helpful, respectful and honest assistant."
}
],
"max_tokens": 500,
"top_p": 1,
"temperature": 0.01
},
)
def predict(message, history):
history_langchain_format = []
for human, ai in history:
history_langchain_format.append(HumanMessage(content=human))
history_langchain_format.append(AIMessage(content=ai))
history_langchain_format.append(HumanMessage(content=message))
gpt_response = llm(message) #history_langchain_format)
return gpt_response#.content
gr.ChatInterface(predict).launch()