Spaces:

modelsmafia
/

models_mafia_llm

Running

modelsmafia commited on 8 days ago

Commit

bda7ad7

1 Parent(s): 6d5782e

chane chat completion

Files changed (2) hide show

.gradio/cached_examples/11/log.csv ADDED Viewed

+Chatbot,timestamp
+"[[""\u0a38\u0a24 \u0a38\u0a4d\u0a30\u0a40 \u0a05\u0a15\u0a3e\u0a32, \u0a24\u0a41\u0a38\u0a40\u0a02 \u0a15\u0a3f\u0a35\u0a47\u0a02 \u0a39\u0a4b?"", ""Error: InferenceClient.chat_completion() got an unexpected keyword argument 'max_new_tokens'\n\nThe model might not be properly configured for inference yet.""]]",2025-04-27 22:58:44.721847
+"[[""Tell me about Punjab in a few sentences."", ""Error: InferenceClient.chat_completion() got an unexpected keyword argument 'max_new_tokens'\n\nThe model might not be properly configured for inference yet.""]]",2025-04-27 22:58:44.723935

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import requests
 # Use InferenceClient to interact with your model through the API
 client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")
@@ -14,14 +13,14 @@ def chat_with_model(message, history):
     messages.append({"role": "user", "content": message})
     try:
-        # Generate response using Inference API
-        response = client.chat_completion(
-            messages=messages,
             max_new_tokens=512,
             temperature=0.7,
             top_p=0.9
         )
-        return response.choices[0].message.content
     except Exception as e:
         return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."

 import gradio as gr
 from huggingface_hub import InferenceClient
 # Use InferenceClient to interact with your model through the API
 client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")
     messages.append({"role": "user", "content": message})
     try:
+        # Generate response using Inference API with correct parameters
+        response = client.text_generation(
+            prompt=message,  # You might need to format this differently
             max_new_tokens=512,
             temperature=0.7,
             top_p=0.9
         )
+        return response
     except Exception as e:
         return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."