Spaces:
Sleeping
Sleeping
import subprocess | |
#from transformers import pipeline | |
import ollama | |
import gradio | |
import os | |
import threading | |
OLLAMA = os.path.expanduser("~/ollama") | |
if not os.path.exists(OLLAMA): | |
subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True) | |
os.chmod(OLLAMA, 0o755) | |
history = [] | |
def ollama_service_thread(): | |
subprocess.run("~/ollama serve", shell=True) | |
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread) | |
OLLAMA_SERVICE_THREAD.start() | |
def get_history_messages(): | |
messages = [] | |
for user, assist in history: | |
messages.append({"role": "user", "content": user}) | |
messages.append({"role": "assistant", "content": assist}) | |
return messages | |
def predict(prompt): | |
response = ollama.chat( | |
model="tinydolphin", | |
messages=[ | |
*get_history_messages(), | |
{"role": "user", "content": prompt} | |
], | |
stream=True | |
) | |
history.append((prompt, "")) | |
message = "" | |
for chunk in response: | |
message += chunk["message"]["content"] | |
history[-1] = (prompt, message) | |
yield "", history | |
def predict_t(prompt): | |
print("Predict:", prompt) | |
print("Loading model") | |
pipe = pipeline("conversational", model="cognitivecomputations/TinyDolphin-2.8-1.1b") | |
print("Running pipeline") | |
response = pipe( | |
[ | |
*get_history_messages(), | |
{"role": "user", "content": prompt} | |
], | |
) | |
history.append((prompt, response.messages[-1]["content"])) | |
print("Predict done") | |
return "", history | |
with gradio.Blocks(fill_height=True) as demo: | |
chat = gradio.Chatbot(scale=1) | |
with gradio.Row(variant="compact"): | |
prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True) | |
button = gradio.Button(scale=1) | |
for handler in [button.click, prompt.submit]: | |
handler(predict, inputs=[prompt], outputs=[prompt, chat]) | |
if __name__ == '__main__': | |
demo.launch() | |