0r0b0r0s commited on
Commit
4505e70
·
verified ·
1 Parent(s): 2cff5f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -18,35 +18,45 @@ from huggingface_hub import InferenceClient, login
18
 
19
  class BasicAgent:
20
  def __init__(self):
21
- # Required for gated models
22
- login(token=os.environ["HF_TOKEN"])
23
 
 
24
  self.client = InferenceClient(
25
- model="Qwen/Qwen2-7B-Instruct-GPTQ-Int8",
26
  token=os.environ["HF_TOKEN"],
27
- timeout=120
28
  )
29
 
30
  # Verify model access
31
- test_response = self.client.text_generation("2+2=", max_new_tokens=10)
32
  if "4" not in test_response:
33
- raise RuntimeError(f"Model test failed: {test_response}")
34
 
35
- def __call__(self, question: str) -> str:
36
- try:
37
- prompt = f"""<|im_start|>system
38
- Answer with ONLY the exact value requested.<|im_end|>
39
  <|im_start|>user
40
  {question}<|im_end|>
41
  <|im_start|>assistant
42
  """
43
- response = self.client.text_generation(
44
- prompt=prompt,
45
- temperature=0.01,
46
- max_new_tokens=100,
47
- stop_sequences=["<|im_end|>"]
48
- )
49
- return response.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
  print(f"Error: {str(e)}")
52
  return ""
 
18
 
19
  class BasicAgent:
20
  def __init__(self):
21
+ login(token=os.environ["HF_TOKEN"]) # Required authentication
 
22
 
23
+ # Primary model (7B quantized)
24
  self.client = InferenceClient(
25
+ model="Qwen/Qwen2-0.5B-Instruct", # 1.2GB, free-tier compatible
26
  token=os.environ["HF_TOKEN"],
27
+ timeout=60
28
  )
29
 
30
  # Verify model access
31
+ test_response = self._call_model("2+2=")
32
  if "4" not in test_response:
33
+ raise RuntimeError("Model initialization failed")
34
 
35
+ def _call_model(self, question: str) -> str:
36
+ """Optimized prompt engineering for GAIA"""
37
+ prompt = f"""<|im_start|>system
38
+ Answer with ONLY the exact value requested. No explanations.<|im_end|>
39
  <|im_start|>user
40
  {question}<|im_end|>
41
  <|im_start|>assistant
42
  """
43
+ return self.client.text_generation(
44
+ prompt=prompt,
45
+ temperature=0.01,
46
+ max_new_tokens=50,
47
+ stop_sequences=["<|im_end|>"]
48
+ )
49
+
50
+ def __call__(self, question: str) -> str:
51
+ try:
52
+ raw_response = self._call_model(question)
53
+
54
+ # Robust answer extraction
55
+ answer = raw_response.split("<|im_start|>assistant")[-1]
56
+ answer = answer.split("<|im_end|>")[0].strip()
57
+
58
+ # GAIA-compliant normalization
59
+ return re.sub(r'[^a-zA-Z0-9]', '', answer).lower()
60
  except Exception as e:
61
  print(f"Error: {str(e)}")
62
  return ""