ayouuubBn commited on
Commit
2779d84
·
verified ·
1 Parent(s): f488ab5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -42
app.py CHANGED
@@ -1,34 +1,27 @@
1
- import os
2
- import time
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
- # Retrieve API token from Hugging Face Secrets
7
- HF_TOKEN = os.getenv("HF_TOKEN")
8
 
9
- # Ensure the token is available
10
- if not HF_TOKEN:
11
- raise ValueError("Missing Hugging Face API Token! Set 'HF_TOKEN' in Hugging Face Secrets.")
 
 
12
 
13
- # Initialize clients with authentication
14
- client_gemma = InferenceClient("google/gemma-1.1-2b-it", token=HF_TOKEN)
15
- client_mistral = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=HF_TOKEN)
16
 
17
- # Function for normal fast responses
18
- def models(query):
19
- messages = [{"role": "user", "content": f"[SYSTEM] You are an AI assistant. Answer concisely. [USER] {query}"}]
20
-
21
- response = ""
22
- try:
23
- for message in client_gemma.chat_completion(messages, max_tokens=1024, stream=True):
24
- token = message.choices[0].delta.content
25
- response += token
26
- yield response
27
- time.sleep(0.3) # Prevent rate limiting
28
- except Exception as e:
29
- yield f"Error: {str(e)}"
30
 
31
- # Function for detailed critical thinking responses
32
  def nemo(query):
33
  budget = 3
34
  message = f"""[INST] [SYSTEM] You are a helpful assistant in normal conversation.
@@ -61,30 +54,22 @@ Example format:
61
  <reflection> [Evaluation of the solution] </reflection>
62
  <reward> [Float between 0.0 and 1.0] </reward> [/INST] [INST] [QUERY] {query} [/INST] [ASSISTANT] """
63
 
 
64
  output = ""
65
- try:
66
- stream = client_mistral.text_generation(message, max_new_tokens=2048, stream=True, details=True, return_full_text=False)
67
- for response in stream:
68
- output += response.token.text
69
- yield output
70
- time.sleep(0.3) # Prevent rate limiting
71
- except Exception as e:
72
- yield f"Error: {str(e)}"
73
 
74
- # Description for Gradio UI
75
- description = "# Chat GO\n### Enter your query and get lightning-fast responses"
 
76
 
77
- # Gradio Interfaces
78
- with gr.Blocks() as demo1:
79
- gr.Interface(fn=models, inputs=["text"], outputs="text", description=description)
80
 
 
 
81
  with gr.Blocks() as demo2:
82
- gr.Interface(fn=nemo, inputs=["text"], outputs="text", description="Critical Thinking Mode", api_name="critical_thinker", concurrency_limit=5)
83
 
84
- # Create a tabbed interface
85
  with gr.Blocks() as demo:
86
- gr.TabbedInterface([demo1, demo2], ["Fast", "Critical"])
87
 
88
- # Launch the application
89
- demo.queue(max_size=100000) # Adjust max queue size
90
  demo.launch()
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ client = InferenceClient("google/gemma-1.1-2b-it")
5
+ client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
6
 
7
+ def models(Query):
8
+
9
+ messages = []
10
+
11
+ messages.append({"role": "user", "content": f"[SYSTEM] You are ASSISTANT who answer question asked by user in short and concise manner. [USER] {Query}"})
12
 
13
+ Response = ""
 
 
14
 
15
+ for message in client.chat_completion(
16
+ messages,
17
+ max_tokens=2048,
18
+ stream=True
19
+ ):
20
+ token = message.choices[0].delta.content
21
+
22
+ Response += token
23
+ yield Response
 
 
 
 
24
 
 
25
  def nemo(query):
26
  budget = 3
27
  message = f"""[INST] [SYSTEM] You are a helpful assistant in normal conversation.
 
54
  <reflection> [Evaluation of the solution] </reflection>
55
  <reward> [Float between 0.0 and 1.0] </reward> [/INST] [INST] [QUERY] {query} [/INST] [ASSISTANT] """
56
 
57
+ stream = client.text_generation(message, max_new_tokens=4096, stream=True, details=True, return_full_text=False)
58
  output = ""
 
 
 
 
 
 
 
 
59
 
60
+ for response in stream:
61
+ output += response.token.text
62
+ return output
63
 
64
+ description="# Chat GO\n### Enter your query and Press enter and get lightning fast response"
 
 
65
 
66
+ with gr.Blocks() as demo1:
67
+ gr.Interface(description=description,fn=models, inputs=["text"], outputs="text")
68
  with gr.Blocks() as demo2:
69
+ gr.Interface(description="Very low but critical thinker",fn=nemo, inputs=["text"], outputs="text", api_name="critical_thinker", concurrency_limit=10)
70
 
 
71
  with gr.Blocks() as demo:
72
+ gr.TabbedInterface([demo1, demo2] , ["Fast", "Critical"])
73
 
74
+ demo.queue(max_size=300000)
 
75
  demo.launch()