pradeepsengarr commited on
Commit
ea38126
·
verified ·
1 Parent(s): 4a35eda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agentic AI Bot using Mistral-7B-Instruct-v0.1 with Text & Speech (Streamlit App)
2
+
3
+ import streamlit as st
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ from gtts import gTTS
6
+ import tempfile
7
+ import os
8
+ import torch
9
+ import speech_recognition as sr
10
+ import soundfile as sf
11
+ import io
12
+
13
+ st.set_page_config(page_title="🧠 Agentic AI Bot", layout="centered")
14
+ st.title("🎙️ Agentic AI Assistant (Text + Voice)")
15
+
16
+ # Load model and tokenizer
17
+ @st.cache_resource
18
+ def load_model():
19
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ "mistralai/Mistral-7B-Instruct-v0.1",
22
+ device_map="auto",
23
+ torch_dtype=torch.float16,
24
+ load_in_4bit=True
25
+ )
26
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
27
+ return pipe
28
+
29
+ pipe = load_model()
30
+
31
+ # Text-to-speech function
32
+ def speak(text):
33
+ tts = gTTS(text)
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
35
+ tts.save(fp.name)
36
+ return fp.name
37
+
38
+ # Speech-to-text function
39
+ def transcribe(audio_bytes):
40
+ recognizer = sr.Recognizer()
41
+ with sr.AudioFile(io.BytesIO(audio_bytes)) as source:
42
+ audio_data = recognizer.record(source)
43
+ try:
44
+ return recognizer.recognize_google(audio_data)
45
+ except sr.UnknownValueError:
46
+ return "Sorry, I could not understand the audio."
47
+ except sr.RequestError:
48
+ return "Speech recognition service is unavailable."
49
+
50
+ # Input mode selection
51
+ mode = st.radio("Choose input mode:", ["Text", "Voice"])
52
+
53
+ user_input = ""
54
+ if mode == "Text":
55
+ user_input = st.text_input("Enter your query:")
56
+ else:
57
+ audio = st.file_uploader("Upload your voice (WAV format only)", type=["wav"])
58
+ if audio is not None:
59
+ audio_bytes = audio.read()
60
+ user_input = transcribe(audio_bytes)
61
+ st.write(f"You said: {user_input}")
62
+
63
+ # Run the assistant
64
+ if user_input:
65
+ with st.spinner("Thinking..."):
66
+ result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
67
+ response = result[0]['generated_text']
68
+
69
+ # Trim prompt from response if repeated
70
+ if response.lower().startswith(user_input.lower()):
71
+ response = response[len(user_input):].strip()
72
+
73
+ st.subheader("🤖 Assistant's Response:")
74
+ st.write(response)
75
+
76
+ # Speak response
77
+ audio_path = speak(response)
78
+ st.audio(audio_path, format="audio/mp3")
79
+
80
+ # Cleanup
81
+ os.remove(audio_path)