Spaces:
Runtime error
Runtime error
File size: 1,861 Bytes
79d2f90 4051b30 975b4c6 79d2f90 73cb49c 79d2f90 254414f 4051b30 3748363 79d2f90 73cb49c 975b4c6 73cb49c 975b4c6 73cb49c 975b4c6 79d2f90 73cb49c 79d2f90 73cb49c 79d2f90 73cb49c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from flask import Flask, request, jsonify
from transformers import pipeline, BlipForConditionalGeneration, BlipProcessor, utils
import torchaudio
from torchaudio.transforms import Resample
import torch
from io import BytesIO
from PIL import Image
from flask_cors import CORS
# ย้าย cache ไปที่ตำแหน่งที่ถูกต้อง
utils.move_cache()
app = Flask(__name__)
CORS(app)
# Initialize TTS model from Hugging Face
tts_model_name = "suno/bark"
tts = pipeline(task="text-to-speech", model=tts_model_name)
# Initialize Blip model for image captioning
model_id = "dblasko/blip-dalle3-img2prompt"
blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
blip_processor = BlipProcessor.from_pretrained(model_id)
def generate_caption(file):
# Generate caption from image using Blip model
inputs = blip_processor(files=file, return_tensors="pt")
pixel_values = inputs.pixel_values
generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
# Use TTS model to convert generated caption to audio
audio_output = tts(generated_caption)
audio_path = "generated_audio_resampled.wav"
torchaudio.save(audio_path, torch.tensor(audio_output[0]), audio_output["sampling_rate"])
return generated_caption, audio_path
@app.route('/upload', methods=['POST'])
def upload_image():
if 'file' not in request.files:
return jsonify({'error': 'No image provided'}), 400
image_file = request.files['file']
generated_caption, audio_path = generate_caption(image_file)
return jsonify({'generated_caption': generated_caption, 'audio_url': audio_path}), 200
if __name__ == '__main__':
app.run(port=5000, debug=True)
|