LocalScribe1

Sleeping

App Files Files Community

KG0101 commited on Nov 5, 2024

Commit

24124c5

verified ·

1 Parent(s): 3de905c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -19

app.py CHANGED Viewed

@@ -1,24 +1,20 @@
 import spaces
 import torch
 import gradio as gr
-from transformers import pipeline, AutoModel, AutoTokenizer
 from llama_cpp import Llama
-import os
 MODEL_NAME = "openai/whisper-large-v3-turbo"
-MODEL_PATH = "model.gguf"  # Path to the downloaded model
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
-# Download and load model if not already present
-if not os.path.exists(MODEL_PATH):
-    from huggingface_hub import hf_hub_download
-    hf_hub_download(repo_id="MaziyarPanahi/Qwen2-7B-Instruct-GGUF", filename="model.gguf", local_dir="./")
-# Load the Llama model with specified context and threading
-llm = Llama(model_path=MODEL_PATH, n_ctx=8000, n_threads=2, chat_format="chatml")
 # Initialize the transcription pipeline
 pipe = pipeline(
@@ -51,16 +47,15 @@ def transcribe(inputs, task):
 # Function to generate SOAP notes using Llama model
 def generate_soap(transcribed_text):
-    prompt = [{"role": "system", "content": sys_prompt}]
-    prompt.append({"role": "user", "content": f"{task_prompt}\n{transcribed_text}"})
-    # Generate a response using the Llama model in streaming mode
-    stream_response = llm.create_chat_completion(messages=prompt, temperature=0.7, max_tokens=2048, stream=True)
-    response = ""
-    for chunk in stream_response:
-        if "content" in chunk['choices'][0]["delta"]:
-            response += chunk['choices'][0]["delta"]["content"]
-    return response
 # Gradio Interfaces for different inputs
 demo = gr.Blocks(theme=gr.themes.Ocean())

 import spaces
 import torch
 import gradio as gr
+from transformers import pipeline
 from llama_cpp import Llama
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
+# Load the Llama model directly from Hugging Face
+llm = Llama.from_pretrained(
+    repo_id="MaziyarPanahi/Qwen2-7B-Instruct-GGUF",
+    filename="Qwen2-7B-Instruct.Q4_K_M.gguf"
+)
 # Initialize the transcription pipeline
 pipe = pipeline(
 # Function to generate SOAP notes using Llama model
 def generate_soap(transcribed_text):
+    # Format the conversation for the Llama model
+    prompt = [
+        {"role": "system", "content": sys_prompt},
+        {"role": "user", "content": f"{task_prompt}\n{transcribed_text}"}
+    ]
+    # Generate a response
+    response = llm.create_chat_completion(messages=prompt, temperature=0.7, max_tokens=2048)
+    return response["choices"][0]["message"]["content"]
 # Gradio Interfaces for different inputs
 demo = gr.Blocks(theme=gr.themes.Ocean())