alishafique commited on
Commit
2f8d3e2
·
verified ·
1 Parent(s): a64c325

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -43
app.py CHANGED
@@ -1,10 +1,3 @@
1
- # import torch
2
- # print(torch.cuda.is_available()) # Should return True
3
- # print(torch.cuda.get_device_name(0)) # Should return 'Tesla T4'
4
- # print(torch.cuda.get_device_capability(0))
5
-
6
-
7
-
8
  import llama_cpp
9
  from llama_cpp import Llama
10
  # import llama_cpp.llama_tokenizer
@@ -17,17 +10,6 @@ model_file = "model-Q8_0.gguf"
17
  model_path_file = hf_hub_download(model_name,
18
  filename=model_file,)
19
 
20
- # llama = llama_cpp.Llama.from_pretrained(
21
- # repo_id="large-traversaal/Alif-1.0-8B-Instruct",
22
- # filename="*model-Q6_K.gguf",
23
- # tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
24
- # "large-traversaal/Alif-1.0-8B-Instruct"
25
- # ),
26
- # verbose=False,
27
- # )
28
-
29
-
30
- # llama = Llama(model_path="./model-Q8_0.gguf", verbose=False)
31
 
32
  llama = Llama(
33
  model_path=model_path_file,
@@ -40,18 +22,6 @@ llama = Llama(
40
 
41
  chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
42
 
43
- # prompt = "قابل تجدید توانائی کیا ہے؟"
44
- prompt = "شہر کراچی کے بارے میں بتاؤ"
45
-
46
- # prompt = chat_prompt.format(inp=prompt)
47
-
48
- # response = llama(prompt, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
49
-
50
-
51
- # # prompt = "قابل تجدید توانائی کیا ہے؟"
52
- # stop_tokens = ["\n\n", "<|end_of_text|>"] # Stops after natural pauses or end-of-text token
53
-
54
-
55
  # Function to generate text with streaming output
56
  def chat_with_ai(prompt):
57
  query = chat_prompt.format(inp=prompt)
@@ -59,17 +29,6 @@ def chat_with_ai(prompt):
59
  #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
60
  response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
61
 
62
- # response = llama.create_chat_completion(
63
- # messages = [
64
- # {"role": "system", "content": "You are a Urdu Chatbot."},
65
- # {
66
- # "role": "user",
67
- # "content": prompt
68
- # }
69
- # ],
70
- # stream=True
71
- # )
72
-
73
  text = ""
74
  for chunk in response:
75
  content = chunk["choices"][0]["text"]
@@ -83,8 +42,8 @@ demo = gr.Interface(
83
  fn=chat_with_ai, # Streaming function
84
  inputs="text", # User input
85
  outputs="text", # Model response
86
- title="💬 Streaming AI Chatbot",
87
- description="Enter a prompt and get a streamed response from Llama.cpp (GGUF)."
88
  )
89
 
90
  # Launch the Gradio app
 
 
 
 
 
 
 
 
1
  import llama_cpp
2
  from llama_cpp import Llama
3
  # import llama_cpp.llama_tokenizer
 
10
  model_path_file = hf_hub_download(model_name,
11
  filename=model_file,)
12
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  llama = Llama(
15
  model_path=model_path_file,
 
22
 
23
  chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
24
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Function to generate text with streaming output
26
  def chat_with_ai(prompt):
27
  query = chat_prompt.format(inp=prompt)
 
29
  #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
30
  response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  text = ""
33
  for chunk in response:
34
  content = chunk["choices"][0]["text"]
 
42
  fn=chat_with_ai, # Streaming function
43
  inputs="text", # User input
44
  outputs="text", # Model response
45
+ title="💬 Streaming Alif-1.0-8B-Instruct Chatbot",
46
+ description="Enter a prompt and get a streamed response."
47
  )
48
 
49
  # Launch the Gradio app