Spaces:

johnpaulbin
/

googoo

Sleeping

App Files Files Community

johnpaulbin commited on Feb 23

Commit

7f36089

verified ·

1 Parent(s): 2c10ce7

Create app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import gradio as gr
+# Download the model from Hugging Face
+model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
+model_file = "articulate-V1-Q8_0.gguf"  # Replace with the actual GGUF file name from the repository
+model_path = hf_hub_download(repo_id=model_name, filename=model_file)
+# Initialize the Llama model with llama-cpp-python
+llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,           # Context length (adjust as needed)
+    n_threads=2,         # Number of CPU threads
+    n_gpu_layers=0        # Run on CPU only (no GPU in free Spaces tier)
+)
+# Define the chat function for Gradio
+def chat(message, history):
+    # Build the message list with history and current user input
+    messages = []
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    # Perform inference with greedy decoding
+    response = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=100,    # Limit output length
+        top_k=1,          # Greedy decoding: select the top token
+        temperature=0.01  # Low temperature for determinism (top_k=1 is sufficient)
+    )
+    # Extract and return the generated text
+    generated_text = response['choices'][0]['message']['content']
+    return generated_text
+# Create the Gradio ChatInterface
+iface = gr.ChatInterface(
+    fn=chat,
+    title="Articulate V1 Chatbot",
+    description="Chat with the Articulate V1 model (Llama 3-based) using greedy decoding."
+)
+# Launch the app
+iface.launch()