tommytracx commited on
Commit
42c727a
·
verified ·
1 Parent(s): 18bdf87

Upload 4 files

Browse files
Files changed (4) hide show
  1. app/agent.py +11 -0
  2. app/main.py +34 -0
  3. app/speech_to_text.py +9 -0
  4. app/text_to_speech.py +11 -0
app/agent.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models.local_llm import run_llm
2
+
3
+ conversation_memory = []
4
+
5
+ def process_text(input_text: str) -> str:
6
+ conversation_memory.append({"user": input_text})
7
+ context = "\n".join([f"User: {m['user']}" for m in conversation_memory])
8
+ prompt = f"You are a telecom AI assistant. Context:\n{context}\nRespond:"
9
+ response = run_llm(prompt)
10
+ conversation_memory.append({"assistant": response})
11
+ return response
app/main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from app.agent import process_text
4
+ from app.speech_to_text import transcribe_audio
5
+ from app.text_to_speech import synthesize_speech
6
+ from fastapi.responses import StreamingResponse, JSONResponse
7
+ import io
8
+
9
+ app = FastAPI()
10
+
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"],
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ @app.post("/transcribe")
19
+ async def transcribe(file: UploadFile = File(...)):
20
+ audio_bytes = await file.read()
21
+ text = transcribe_audio(audio_bytes)
22
+ return {"transcription": text}
23
+
24
+ @app.post("/query")
25
+ async def query_agent(request: Request):
26
+ data = await request.json()
27
+ input_text = data.get("input_text", "")
28
+ response = process_text(input_text)
29
+ return {"response": response}
30
+
31
+ @app.get("/speak")
32
+ async def speak(text: str):
33
+ audio = synthesize_speech(text)
34
+ return StreamingResponse(io.BytesIO(audio), media_type="audio/wav")
app/speech_to_text.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+
3
+ model = whisper.load_model("base")
4
+
5
+ def transcribe_audio(audio_bytes):
6
+ with open("temp.wav", "wb") as f:
7
+ f.write(audio_bytes)
8
+ result = model.transcribe("temp.wav")
9
+ return result["text"]
app/text_to_speech.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import edge_tts
3
+
4
+ async def generate_tts(text: str):
5
+ communicate = edge_tts.Communicate(text, "en-US-JennyNeural")
6
+ await communicate.save("speech.mp3")
7
+ with open("speech.mp3", "rb") as f:
8
+ return f.read()
9
+
10
+ def synthesize_speech(text):
11
+ return asyncio.run(generate_tts(text))