johnpaulbin commited on
Commit
7f36089
·
verified ·
1 Parent(s): 2c10ce7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ from llama_cpp import Llama
3
+ import gradio as gr
4
+
5
+ # Download the model from Hugging Face
6
+ model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
7
+ model_file = "articulate-V1-Q8_0.gguf" # Replace with the actual GGUF file name from the repository
8
+ model_path = hf_hub_download(repo_id=model_name, filename=model_file)
9
+
10
+ # Initialize the Llama model with llama-cpp-python
11
+ llm = Llama(
12
+ model_path=model_path,
13
+ n_ctx=1024, # Context length (adjust as needed)
14
+ n_threads=2, # Number of CPU threads
15
+ n_gpu_layers=0 # Run on CPU only (no GPU in free Spaces tier)
16
+ )
17
+
18
+ # Define the chat function for Gradio
19
+ def chat(message, history):
20
+ # Build the message list with history and current user input
21
+ messages = []
22
+ for user_msg, assistant_msg in history:
23
+ messages.append({"role": "user", "content": user_msg})
24
+ messages.append({"role": "assistant", "content": assistant_msg})
25
+ messages.append({"role": "user", "content": message})
26
+
27
+ # Perform inference with greedy decoding
28
+ response = llm.create_chat_completion(
29
+ messages=messages,
30
+ max_tokens=100, # Limit output length
31
+ top_k=1, # Greedy decoding: select the top token
32
+ temperature=0.01 # Low temperature for determinism (top_k=1 is sufficient)
33
+ )
34
+
35
+ # Extract and return the generated text
36
+ generated_text = response['choices'][0]['message']['content']
37
+ return generated_text
38
+
39
+ # Create the Gradio ChatInterface
40
+ iface = gr.ChatInterface(
41
+ fn=chat,
42
+ title="Articulate V1 Chatbot",
43
+ description="Chat with the Articulate V1 model (Llama 3-based) using greedy decoding."
44
+ )
45
+
46
+ # Launch the app
47
+ iface.launch()