Final_Assignment_Template

Runtime error

0r0b0r0s commited on 12 days ago

Commit

4505e70

verified ·

1 Parent(s): 2cff5f4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,35 +18,45 @@ from huggingface_hub import InferenceClient, login
 class BasicAgent:
     def __init__(self):
-        # Required for gated models
-        login(token=os.environ["HF_TOKEN"])
         self.client = InferenceClient(
-            model="Qwen/Qwen2-7B-Instruct-GPTQ-Int8",
             token=os.environ["HF_TOKEN"],
-            timeout=120
         )
         # Verify model access
-        test_response = self.client.text_generation("2+2=", max_new_tokens=10)
         if "4" not in test_response:
-            raise RuntimeError(f"Model test failed: {test_response}")
-    def __call__(self, question: str) -> str:
-        try:
-            prompt = f"""<|im_start|>system
-Answer with ONLY the exact value requested.<|im_end|>
 <|im_start|>user
 {question}<|im_end|>
 <|im_start|>assistant
 """
-            response = self.client.text_generation(
-                prompt=prompt,
-                temperature=0.01,
-                max_new_tokens=100,
-                stop_sequences=["<|im_end|>"]
-            )
-            return response.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
         except Exception as e:
             print(f"Error: {str(e)}")
             return ""

 class BasicAgent:
     def __init__(self):
+        login(token=os.environ["HF_TOKEN"])  # Required authentication
+        # Primary model (7B quantized)
         self.client = InferenceClient(
+            model="Qwen/Qwen2-0.5B-Instruct",  # 1.2GB, free-tier compatible
             token=os.environ["HF_TOKEN"],
+            timeout=60
         )
         # Verify model access
+        test_response = self._call_model("2+2=")
         if "4" not in test_response:
+            raise RuntimeError("Model initialization failed")
+    def _call_model(self, question: str) -> str:
+        """Optimized prompt engineering for GAIA"""
+        prompt = f"""<|im_start|>system
+Answer with ONLY the exact value requested. No explanations.<|im_end|>
 <|im_start|>user
 {question}<|im_end|>
 <|im_start|>assistant
 """
+        return self.client.text_generation(
+            prompt=prompt,
+            temperature=0.01,
+            max_new_tokens=50,
+            stop_sequences=["<|im_end|>"]
+        )
+    def __call__(self, question: str) -> str:
+        try:
+            raw_response = self._call_model(question)
+            # Robust answer extraction
+            answer = raw_response.split("<|im_start|>assistant")[-1]
+            answer = answer.split("<|im_end|>")[0].strip()
+            # GAIA-compliant normalization
+            return re.sub(r'[^a-zA-Z0-9]', '', answer).lower()
         except Exception as e:
             print(f"Error: {str(e)}")
             return ""