Goodnight7 commited on
Commit
8942d5c
·
verified ·
1 Parent(s): f528f15

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PIL import Image
4
+ import io
5
+ import base64
6
+ import requests
7
+ import json
8
+ from pathlib import Path
9
+
10
+ # Ensure assets directory exists
11
+ Path("./assets").mkdir(parents=True, exist_ok=True)
12
+
13
+ # Function to call Groq API directly (avoiding the groq package)
14
+ def call_groq_api(image_base64, model, prompt):
15
+ api_key = os.environ.get("GROQ_API_KEY", "")
16
+
17
+ if not api_key:
18
+ return None, "Error: GROQ_API_KEY environment variable is not set."
19
+
20
+ headers = {
21
+ "Authorization": f"Bearer {api_key}",
22
+ "Content-Type": "application/json"
23
+ }
24
+
25
+ payload = {
26
+ "model": model,
27
+ "messages": [
28
+ {
29
+ "role": "user",
30
+ "content": [
31
+ {
32
+ "type": "text",
33
+ "text": prompt
34
+ },
35
+ {
36
+ "type": "image_url",
37
+ "image_url": {
38
+ "url": f"data:image/png;base64,{image_base64}"
39
+ }
40
+ }
41
+ ]
42
+ }
43
+ ],
44
+ "temperature": 0.1,
45
+ "max_tokens": 1000
46
+ }
47
+
48
+ try:
49
+ response = requests.post(
50
+ "https://api.groq.com/openai/v1/chat/completions",
51
+ headers=headers,
52
+ json=payload
53
+ )
54
+ response.raise_for_status()
55
+ return response.json()["choices"][0]["message"]["content"], None
56
+ except Exception as e:
57
+ return None, f"Error calling Groq API: {str(e)}"
58
+
59
+ # Page configuration
60
+ st.set_page_config(
61
+ page_title="Llama-3-2-90b-vision-preview",
62
+ page_icon="👁️",
63
+ layout="wide",
64
+ initial_sidebar_state="expanded"
65
+ )
66
+
67
+ # Add clear button to top right
68
+ col1, col2 = st.columns([6, 1])
69
+ with col1:
70
+ st.markdown("""
71
+ <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview
72
+ """.format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True)
73
+ with col2:
74
+ if st.button("Clear 🗑️"):
75
+ if "ocr_result" in st.session_state:
76
+ del st.session_state["ocr_result"]
77
+ st.rerun()
78
+
79
+ st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True)
80
+ st.markdown("---")
81
+
82
+ # Move upload controls to sidebar
83
+ with st.sidebar:
84
+ st.header("Upload Image")
85
+ uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
86
+
87
+ # Model selection
88
+ st.subheader("Model Settings")
89
+ model = st.selectbox(
90
+ "Select Vision Model",
91
+ ["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"],
92
+ index=0
93
+ )
94
+
95
+ if uploaded_file is not None:
96
+ # Display the uploaded image
97
+ image = Image.open(uploaded_file)
98
+ st.image(image, caption="Uploaded Image")
99
+
100
+ if st.button("Extract Text 🔍", type="primary"):
101
+ with st.spinner("Processing image..."):
102
+ try:
103
+ # Convert image for API
104
+ buffered = io.BytesIO()
105
+ image.save(buffered, format="PNG")
106
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
107
+
108
+ # Prepare the prompt
109
+ prompt = """Analyze the text in the provided image. Extract all readable content
110
+ and present it in a structured Markdown format that is clear, concise,
111
+ and well-organized. Ensure proper formatting (e.g., headings, lists, or
112
+ code blocks) as necessary to represent the content effectively."""
113
+
114
+ # Call the API
115
+ result, error = call_groq_api(img_str, model, prompt)
116
+
117
+ if error:
118
+ st.error(error)
119
+ else:
120
+ st.session_state["ocr_result"] = result
121
+ except Exception as e:
122
+ st.error(f"Error processing image: {str(e)}")
123
+
124
+ # Main content area for results
125
+ if "ocr_result" in st.session_state:
126
+ st.markdown(st.session_state["ocr_result"])
127
+ else:
128
+ st.info("Upload an image and click 'Extract Text' to see the results here.")
129
+
130
+ # Footer
131
+ st.markdown("---")
132
+ st.markdown("Made using Vision Models via Groq API")