File size: 1,997 Bytes
b50e8e0
 
 
232d9d3
887a802
 
 
2c0fdb1
887a802
2c0fdb1
887a802
120e3f7
232d9d3
 
 
887a802
 
 
d61df23
887a802
 
 
232d9d3
 
 
 
 
 
 
 
 
b50e8e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13dde62
b50e8e0
92158f0
b50e8e0
92158f0
7962d2c
232d9d3
 
 
 
13dde62
 
 
232d9d3
a548531
232d9d3
 
 
 
 
 
 
 
 
 
ba6d665
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import subprocess
#from transformers import pipeline
import ollama
import gradio
import os
import threading

OLLAMA = os.path.expanduser("~/ollama")

if not os.path.exists(OLLAMA):
    subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
    os.chmod(OLLAMA, 0o755)

history = []

def ollama_service_thread():
    subprocess.run("~/ollama serve", shell=True)

OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()
      

def get_history_messages():
    messages = []
    for user, assist in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assist})
    return messages


def predict(prompt):
    response = ollama.chat(
        model="tinydolphin",
        messages=[
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
        stream=True
    )
    history.append((prompt, ""))
    message = ""
    for chunk in response:
        message += chunk["message"]["content"]
        history[-1] = (prompt, message)
        yield "", history


def predict_t(prompt):
    print("Predict:", prompt)
    print("Loading model")
    pipe = pipeline("conversational", model="cognitivecomputations/TinyDolphin-2.8-1.1b")
    print("Running pipeline")
    response = pipe(
        [
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
    )
    history.append((prompt, response.messages[-1]["content"])) 
    print("Predict done")
    return "", history

with gradio.Blocks(fill_height=True) as demo:
    chat = gradio.Chatbot(scale=1)
    with gradio.Row(variant="compact"):
        prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True)
        button = gradio.Button(scale=1)

    for handler in [button.click, prompt.submit]:
        handler(predict, inputs=[prompt], outputs=[prompt, chat])


if __name__ == '__main__':
    demo.launch()