Spaces:
Sleeping
Sleeping
from transformers import AutoProcessor, Gemma3ForConditionalGeneration | |
import gradio as gr | |
# from PIL import Image | |
# import requests | |
# import torch | |
# import os | |
# from transformers import Gemma3ForConditionalGeneration, AutoProcessor | |
# print("hey") | |
# # Set the cache directory | |
# cache_dir = "F:\\huggingface_cache" | |
# # Set environment variables for good measure | |
# # os.environ["TRANSFORMERS_CACHE"] = cache_dir | |
# # os.environ["HF_HOME"] = cache_dir | |
# # os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir | |
# # Model ID | |
# model_id = "gemma3:latest" | |
# from ollama import chat | |
# from ollama import ChatResponse | |
# def _get_response(message): | |
# messages = [ | |
# { | |
# 'role': 'user', | |
# 'content': message, | |
# }, | |
# ] | |
# response: ChatResponse = chat(model=model_id, messages=messages) | |
# return response.message.content | |
# import requests | |
# import base64 | |
# # Function to encode image to Base64 | |
# def encode_image_to_base64(image_path): | |
# with open(image_path, "rb") as image_file: | |
# return base64.b64encode(image_file.read()).decode("utf-8") | |
# def image_process(): | |
# image_path = r"F:\HF\gemma-examples\WhatsApp Image 2025-03-21 at 10.05.06 PM.jpeg" # Replace with your image path | |
# # Encode the image | |
# image_base64 = encode_image_to_base64(image_path) | |
# # Ollama API endpoint | |
# OLLAMA_URL = "http://localhost:11434/api/generate" | |
# # Payload for the API request | |
# payload = { | |
# "model": model_id, # Specify the model version | |
# "prompt": "Given image is a handwritten text in english language, read it carefully and extract all the text mentioned in it.", | |
# "images": [image_base64], # List of Base64-encoded images | |
# "stream": False | |
# } | |
# # Headers for the request | |
# headers = { | |
# "Content-Type": "application/json" | |
# } | |
# # Send the POST request | |
# response = requests.post(OLLAMA_URL, json=payload, headers=headers) | |
# # Check the response | |
# if response.status_code == 200: | |
# data = response.json() | |
# print("Response from Gemma 3:") | |
# print(data.get("response", "No response field in the API response.")) | |
# else: | |
# print(f"Error: {response.status_code}") | |
# print(response.text) | |
# return response.text | |
# # Path to your image | |
# def _hit_endpoint(name): | |
# import requests | |
# import json | |
# # Define the URL of the Ollama server | |
# OLLAMA_URL = "http://localhost:11434/api/generate" | |
# # Define the request payload | |
# payload = { | |
# "model": model_id, # Change this to your desired model | |
# "prompt": name, | |
# "stream": False | |
# } | |
# # Make the request | |
# response = requests.post(OLLAMA_URL, json=payload) | |
# # Parse and print the response | |
# if response.status_code == 200: | |
# data = response.json() | |
# print(data["response"]) # Extracting the generated text | |
# return data["response"] | |
# else: | |
# print(f"Error: {response.status_code} - {response.text}") | |
# return "An error occurred!" | |
import os | |
import torch | |
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer | |
import os | |
from huggingface_hub import login | |
import os | |
login(token=os.getenv("hf_token") ) | |
model_id = os.getenv("MODEL_ID", "google/gemma-3-12b-it") | |
processor = AutoProcessor.from_pretrained(model_id, padding_side="left") | |
model = Gemma3ForConditionalGeneration.from_pretrained( | |
model_id, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager",cache_dir = "F:\\huggingface_cache" | |
) | |
def run_fn(message): | |
messages_list = [] | |
''' | |
conversation = [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "image", "image": "https://www.ilankelman.org/stopsigns/australia.jpg"}, | |
{"type": "text", "text": "Please describe this image in detail."}, | |
], | |
}, | |
] | |
''' | |
messages_list.append({"role": "user", "content":[{ "type":"text","text": message}] }) | |
inputs = processor.apply_chat_template( | |
messages_list, | |
add_generation_prompt=True, | |
tokenize=True, | |
return_dict=True, | |
return_tensors="pt", | |
).to(device=model.device, dtype=torch.bfloat16) | |
streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True) | |
max_new_tokens = 100 | |
generate_kwargs = dict( | |
inputs, | |
streamer=streamer, | |
max_new_tokens=max_new_tokens, | |
) | |
outputs = model.generate(**generate_kwargs) | |
return outputs | |
# return None | |
def greet(name): | |
return run_fn(name) | |
demo = gr.Interface(fn=greet, inputs="text", outputs="text") | |
demo.launch() |