Spaces:

Goodnight7
/

llama3.2_vision

Running

App Files Files Community

llama3.2_vision / app.py

Goodnight7

Create app.py

8942d5c verified about 2 months ago

raw

history blame

4.15 kB

	import streamlit as st
	import os
	from PIL import Image
	import io
	import base64
	import requests
	import json
	from pathlib import Path

	# Ensure assets directory exists
	Path("./assets").mkdir(parents=True, exist_ok=True)

	# Function to call Groq API directly (avoiding the groq package)
	def call_groq_api(image_base64, model, prompt):
	api_key = os.environ.get("GROQ_API_KEY", "")

	if not api_key:
	return None, "Error: GROQ_API_KEY environment variable is not set."

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": model,
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	}
	}
	]
	}
	],
	"temperature": 0.1,
	"max_tokens": 1000
	}

	try:
	response = requests.post(
	"https://api.groq.com/openai/v1/chat/completions",
	headers=headers,
	json=payload
	)
	response.raise_for_status()
	return response.json()["choices"][0]["message"]["content"], None
	except Exception as e:
	return None, f"Error calling Groq API: {str(e)}"

	# Page configuration
	st.set_page_config(
	page_title="Llama-3-2-90b-vision-preview",
	page_icon="👁️",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Add clear button to top right
	col1, col2 = st.columns([6, 1])
	with col1:
	st.markdown("""
	<img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Llama-3-2-90b-vision-preview
	""".format(base64.b64encode(open("img/llama.png", "rb").read()).decode()), unsafe_allow_html=True)
	with col2:
	if st.button("Clear 🗑️"):
	if "ocr_result" in st.session_state:
	del st.session_state["ocr_result"]
	st.rerun()

	st.markdown("Extract structured text from images using Vision Models!", unsafe_allow_html=True)
	st.markdown("---")

	# Move upload controls to sidebar
	with st.sidebar:
	st.header("Upload Image")
	uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

	# Model selection
	st.subheader("Model Settings")
	model = st.selectbox(
	"Select Vision Model",
	["Llama-3-2-11b-vision-preview", "Llama-3-2-90b-vision-preview"],
	index=0
	)

	if uploaded_file is not None:
	# Display the uploaded image
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image")

	if st.button("Extract Text 🔍", type="primary"):
	with st.spinner("Processing image..."):
	try:
	# Convert image for API
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

	# Prepare the prompt
	prompt = """Analyze the text in the provided image. Extract all readable content
	and present it in a structured Markdown format that is clear, concise,
	and well-organized. Ensure proper formatting (e.g., headings, lists, or
	code blocks) as necessary to represent the content effectively."""

	# Call the API
	result, error = call_groq_api(img_str, model, prompt)

	if error:
	st.error(error)
	else:
	st.session_state["ocr_result"] = result
	except Exception as e:
	st.error(f"Error processing image: {str(e)}")

	# Main content area for results
	if "ocr_result" in st.session_state:
	st.markdown(st.session_state["ocr_result"])
	else:
	st.info("Upload an image and click 'Extract Text' to see the results here.")

	# Footer
	st.markdown("---")
	st.markdown("Made using Vision Models via Groq API")