Spaces:

lemonteaa
/

bitnet-with-ik_llama

Sleeping

App Files Files Community

lemonteaa commited on 15 days ago

Commit

a7e66dd

verified ·

1 Parent(s): 7a1b440

Update chat_demo.py

Browse files

Files changed (1) hide show

chat_demo.py +35 -1

chat_demo.py CHANGED Viewed

@@ -4,10 +4,38 @@ import uuid
 import json
 import os
 import tempfile
 BASE_URL = "http://localhost:8080/v1"
 MODEL_NAME = "bn"
 cli = OpenAI(api_key="sk-nokey", base_url=BASE_URL)
 def openai_call(message, history, system_prompt, max_new_tokens):
@@ -81,5 +109,11 @@ with gr.Blocks() as demo:
         .success(fn=clean_file, inputs=[orig_path])
     download_file.download(on_download, None, None)
-demo.queue(max_size=10, api_open=True).launch(server_name='0.0.0.0')

 import json
 import os
 import tempfile
+import subprocess
+import threading
 BASE_URL = "http://localhost:8080/v1"
 MODEL_NAME = "bn"
+def read_output(process):
+    """Reads the output from the subprocess and prints it to the console."""
+    for line in iter(process.stdout.readline, ""):
+        print(line.rstrip())
+    process.stdout.close()
+def start_server(command):
+    """Starts the server as a subprocess and captures its stdout."""
+    # Start the server process
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,  # Redirect stderr to stdout
+        text=True  # Automatically decode the output to text
+    )
+    # Start a thread to read the output
+    output_thread = threading.Thread(target=read_output, args=(process,))
+    output_thread.daemon = True  # Daemonize the thread so it exits when the main program does
+    output_thread.start()
+    return process
+server_process = start_server(["./ik_llama.cpp/build/bin/llama-server", "-m" ,"./ik_llama.cpp/build/model-out.gguf", "--chat-template", "vicuna"])
 cli = OpenAI(api_key="sk-nokey", base_url=BASE_URL)
 def openai_call(message, history, system_prompt, max_new_tokens):
         .success(fn=clean_file, inputs=[orig_path])
     download_file.download(on_download, None, None)
+try:
+    demo.queue(max_size=10, api_open=True).launch(server_name='0.0.0.0')
+finally:
+    # Stop the server
+    server_process.terminate()
+    server_process.wait()
+    print("Server stopped.")