BarBar288 commited on
Commit
340f6f8
·
verified ·
1 Parent(s): 8834327

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -23
app.py CHANGED
@@ -32,22 +32,12 @@ text_to_speech_models = {
32
  conversational_tokenizers = {}
33
  conversational_models_loaded = {}
34
 
35
- for model_name, model_id in conversational_models.items():
36
- conversational_tokenizers[model_name] = AutoTokenizer.from_pretrained(model_id)
37
- conversational_models_loaded[model_name] = AutoModelForCausalLM.from_pretrained(model_id)
38
-
39
  # Initialize pipelines for Text-to-Image
40
  text_to_image_pipelines = {}
41
 
42
- for model_name, model_id in text_to_image_models.items():
43
- text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(model_id)
44
-
45
  # Initialize pipelines for Text-to-Speech
46
  text_to_speech_pipelines = {}
47
 
48
- for model_name, model_id in text_to_speech_models.items():
49
- text_to_speech_pipelines[model_name] = pipeline("text-to-speech", model=model_id)
50
-
51
  # Initialize pipelines for other tasks
52
  visual_qa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
53
  document_qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
@@ -61,9 +51,16 @@ summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cn
61
  text_to_audio_pipeline = pipeline("text-to-speech", model="julien-c/ljspeech_tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space")
62
  audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base")
63
 
 
 
 
 
 
 
 
 
64
  def chat(model_name, user_input, history=[]):
65
- tokenizer = conversational_tokenizers[model_name]
66
- model = conversational_models_loaded[model_name]
67
 
68
  # Encode the input
69
  input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt")
@@ -83,11 +80,15 @@ def chat(model_name, user_input, history=[]):
83
  return history, history
84
 
85
  def generate_image(model_name, prompt):
 
 
86
  pipeline = text_to_image_pipelines[model_name]
87
  image = pipeline(prompt).images[0]
88
  return image
89
 
90
  def generate_speech(model_name, text):
 
 
91
  pipeline = text_to_speech_pipelines[model_name]
92
  audio = pipeline(text)
93
  return audio["audio"]
@@ -235,14 +236,4 @@ with gr.Blocks() as demo:
235
  text_to_audio_generate = gr.Button("Generate Audio")
236
  text_to_audio_output = gr.Audio(label="Generated Audio")
237
 
238
- text_to_audio_generate.click(text_to_audio, inputs=text_to_audio_text, outputs=text_to_audio_output)
239
-
240
- with gr.Tab("Audio Classification"):
241
- audio_classification_audio = gr.Audio(label="Upload Audio")
242
- audio_classification_generate = gr.Button("Classify")
243
- audio_classification_output = gr.Textbox(label="Classification Result")
244
-
245
- audio_classification_generate.click(audio_classification, inputs=audio_classification_audio, outputs=audio_classification_output)
246
-
247
- # Launch the demo
248
- demo.launch()
 
32
  conversational_tokenizers = {}
33
  conversational_models_loaded = {}
34
 
 
 
 
 
35
  # Initialize pipelines for Text-to-Image
36
  text_to_image_pipelines = {}
37
 
 
 
 
38
  # Initialize pipelines for Text-to-Speech
39
  text_to_speech_pipelines = {}
40
 
 
 
 
41
  # Initialize pipelines for other tasks
42
  visual_qa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
43
  document_qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
51
  text_to_audio_pipeline = pipeline("text-to-speech", model="julien-c/ljspeech_tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space")
52
  audio_classification_pipeline = pipeline("audio-classification", model="facebook/wav2vec2-base")
53
 
54
+ def load_conversational_model(model_name):
55
+ if model_name not in conversational_models_loaded:
56
+ tokenizer = AutoTokenizer.from_pretrained(conversational_models[model_name])
57
+ model = AutoModelForCausalLM.from_pretrained(conversational_models[model_name])
58
+ conversational_tokenizers[model_name] = tokenizer
59
+ conversational_models_loaded[model_name] = model
60
+ return conversational_tokenizers[model_name], conversational_models_loaded[model_name]
61
+
62
  def chat(model_name, user_input, history=[]):
63
+ tokenizer, model = load_conversational_model(model_name)
 
64
 
65
  # Encode the input
66
  input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt")
 
80
  return history, history
81
 
82
  def generate_image(model_name, prompt):
83
+ if model_name not in text_to_image_pipelines:
84
+ text_to_image_pipelines[model_name] = StableDiffusionPipeline.from_pretrained(text_to_image_models[model_name])
85
  pipeline = text_to_image_pipelines[model_name]
86
  image = pipeline(prompt).images[0]
87
  return image
88
 
89
  def generate_speech(model_name, text):
90
+ if model_name not in text_to_speech_pipelines:
91
+ text_to_speech_pipelines[model_name] = pipeline("text-to-speech", model=text_to_speech_models[model_name])
92
  pipeline = text_to_speech_pipelines[model_name]
93
  audio = pipeline(text)
94
  return audio["audio"]
 
236
  text_to_audio_generate = gr.Button("Generate Audio")
237
  text_to_audio_output = gr.Audio(label="Generated Audio")
238
 
239
+ text_to_audio_generate.click(text_to_audio, inputs=text_to_audio_text, outputs=text_to_audio_output)