Nitin00043 commited on
Commit
aebafcf
·
verified ·
1 Parent(s): e50f30c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -19
app.py CHANGED
@@ -10,36 +10,32 @@ model_name = "google/matcha-base"
10
  model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
11
  processor = Pix2StructProcessor.from_pretrained(model_name)
12
 
13
- # Move model to GPU if available for faster inference
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  model.to(device)
 
16
 
17
  def solve_math_problem(image):
18
- # Preprocess the image and include a clear prompt.
19
- # You can adjust the prompt to better match your task if needed.
20
  inputs = processor(images=image, text="Solve the math problem:", return_tensors="pt")
21
- # Ensure the tensors are on the same device as the model
22
  inputs = {key: value.to(device) for key, value in inputs.items()}
23
 
24
- # Generate the solution using beam search.
25
- # Adjust parameters for best performance:
26
- # - max_new_tokens: Allows longer responses.
27
- # - num_beams: Uses beam search to explore multiple hypotheses.
28
- # - early_stopping: Stops decoding once a complete answer is generated.
29
- # - temperature: Controls randomness (lower value = more deterministic).
30
- predictions = model.generate(
31
- **inputs,
32
- max_new_tokens=150,
33
- num_beams=5,
34
- early_stopping=True,
35
- temperature=0.5
36
- )
37
 
38
- # Decode the output to get a string answer, skipping any special tokens.
39
  solution = processor.decode(predictions[0], skip_special_tokens=True)
40
  return solution
41
 
42
- # Set up a Gradio interface
43
  demo = gr.Interface(
44
  fn=solve_math_problem,
45
  inputs=gr.Image(type="pil", label="Upload Handwritten Math Problem"),
@@ -51,3 +47,4 @@ demo = gr.Interface(
51
 
52
  if __name__ == "__main__":
53
  demo.launch()
 
 
10
  model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
11
  processor = Pix2StructProcessor.from_pretrained(model_name)
12
 
13
+ # Move model to GPU if available and set to evaluation mode
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  model.to(device)
16
+ model.eval()
17
 
18
  def solve_math_problem(image):
19
+ # Preprocess the image and include a prompt.
 
20
  inputs = processor(images=image, text="Solve the math problem:", return_tensors="pt")
21
+ # Move all tensors to the same device as the model
22
  inputs = {key: value.to(device) for key, value in inputs.items()}
23
 
24
+ # Generate the solution using beam search within a no_grad context
25
+ with torch.no_grad():
26
+ predictions = model.generate(
27
+ **inputs,
28
+ max_new_tokens=150, # Increase this if longer answers are needed
29
+ num_beams=5, # Beam search for more stable outputs
30
+ early_stopping=True,
31
+ temperature=0.5 # Lower temperature for more deterministic output
32
+ )
 
 
 
 
33
 
34
+ # Decode the generated tokens to a string, skipping special tokens
35
  solution = processor.decode(predictions[0], skip_special_tokens=True)
36
  return solution
37
 
38
+ # Set up the Gradio interface
39
  demo = gr.Interface(
40
  fn=solve_math_problem,
41
  inputs=gr.Image(type="pil", label="Upload Handwritten Math Problem"),
 
47
 
48
  if __name__ == "__main__":
49
  demo.launch()
50
+