Spaces:

traversaal-internal
/

Alif-1.0-8B-Instruct

Sleeping

App Files Files Community

alishafique commited on Feb 21

Commit

2f8d3e2

verified ·

1 Parent(s): a64c325

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -43

app.py CHANGED Viewed

@@ -1,10 +1,3 @@
-# import torch
-# print(torch.cuda.is_available())  # Should return True
-# print(torch.cuda.get_device_name(0))  # Should return 'Tesla T4'
-# print(torch.cuda.get_device_capability(0))
 import llama_cpp
 from llama_cpp import Llama
 # import llama_cpp.llama_tokenizer
@@ -17,17 +10,6 @@ model_file = "model-Q8_0.gguf"
 model_path_file = hf_hub_download(model_name,
                              filename=model_file,)
-# llama = llama_cpp.Llama.from_pretrained(
-#     repo_id="large-traversaal/Alif-1.0-8B-Instruct",
-#     filename="*model-Q6_K.gguf",
-#     tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
-#         "large-traversaal/Alif-1.0-8B-Instruct"
-#     ),
-#     verbose=False,
-# )
-# llama = Llama(model_path="./model-Q8_0.gguf", verbose=False)
 llama = Llama(
     model_path=model_path_file,
@@ -40,18 +22,6 @@ llama = Llama(
 chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
-# prompt = "قابل تجدید توانائی کیا ہے؟"
-prompt = "شہر کراچی کے بارے میں بتاؤ"
-# prompt = chat_prompt.format(inp=prompt)
-# response = llama(prompt, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
-# # prompt = "قابل تجدید توانائی کیا ہے؟"
-# stop_tokens = ["\n\n", "<|end_of_text|>"]  # Stops after natural pauses or end-of-text token
 # Function to generate text with streaming output
 def chat_with_ai(prompt):
     query = chat_prompt.format(inp=prompt)
@@ -59,17 +29,6 @@ def chat_with_ai(prompt):
     #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True)  # Enable streaming
     response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
-    # response = llama.create_chat_completion(
-    #     messages = [
-    #         {"role": "system", "content": "You are a Urdu Chatbot."},
-    #         {
-    #             "role": "user",
-    #             "content": prompt
-    #         }
-    #     ],
-    #     stream=True
-    # )
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
@@ -83,8 +42,8 @@ demo = gr.Interface(
     fn=chat_with_ai,  # Streaming function
     inputs="text",  # User input
     outputs="text",  # Model response
-    title="💬 Streaming AI Chatbot",
-    description="Enter a prompt and get a streamed response from Llama.cpp (GGUF)."
 )
 # Launch the Gradio app

 import llama_cpp
 from llama_cpp import Llama
 # import llama_cpp.llama_tokenizer
 model_path_file = hf_hub_download(model_name,
                              filename=model_file,)
 llama = Llama(
     model_path=model_path_file,
 chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
 # Function to generate text with streaming output
 def chat_with_ai(prompt):
     query = chat_prompt.format(inp=prompt)
     #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True)  # Enable streaming
     response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
     fn=chat_with_ai,  # Streaming function
     inputs="text",  # User input
     outputs="text",  # Model response
+    title="💬 Streaming Alif-1.0-8B-Instruct Chatbot",
+    description="Enter a prompt and get a streamed response."
 )
 # Launch the Gradio app