modelsmafia commited on
Commit
213e222
·
1 Parent(s): bda7ad7

api inference

Browse files
Files changed (1) hide show
  1. app.py +32 -15
app.py CHANGED
@@ -1,26 +1,43 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
 
4
- # Use InferenceClient to interact with your model through the API
5
- client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")
6
 
7
  def chat_with_model(message, history):
8
  # Format conversation history
9
- messages = []
10
  for h in history:
11
- messages.append({"role": "user", "content": h[0]})
12
- messages.append({"role": "assistant", "content": h[1]})
13
- messages.append({"role": "user", "content": message})
14
 
15
  try:
16
- # Generate response using Inference API with correct parameters
17
- response = client.text_generation(
18
- prompt=message, # You might need to format this differently
19
- max_new_tokens=512,
20
- temperature=0.7,
21
- top_p=0.9
22
- )
23
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
  return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."
26
 
 
1
  import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
 
6
+ # Get API token from environment variable (set this in your Space settings)
7
+ API_TOKEN = os.getenv("HF_API_TOKEN", "") # Make sure to add your token in Space settings
8
 
9
  def chat_with_model(message, history):
10
  # Format conversation history
11
+ full_prompt = ""
12
  for h in history:
13
+ full_prompt += f"<start_of_turn>user\n{h[0]}\n<end_of_turn>\n"
14
+ full_prompt += f"<start_of_turn>model\n{h[1]}\n<end_of_turn>\n"
15
+ full_prompt += f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n"
16
 
17
  try:
18
+ headers = {
19
+ "Authorization": f"Bearer {API_TOKEN}",
20
+ "Content-Type": "application/json"
21
+ }
22
+
23
+ payload = {
24
+ "inputs": full_prompt,
25
+ "parameters": {
26
+ "max_new_tokens": 512,
27
+ "temperature": 0.7,
28
+ "top_p": 0.9,
29
+ "do_sample": True
30
+ }
31
+ }
32
+
33
+ API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B"
34
+ response = requests.post(API_URL, headers=headers, json=payload)
35
+
36
+ if response.status_code == 200:
37
+ return response.json()[0]["generated_text"].replace(full_prompt, "")
38
+ else:
39
+ return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face."
40
+
41
  except Exception as e:
42
  return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."
43