Spaces:

ignitariumcloud
/

Voice_Controlled_Shop_Assistant_System

Sleeping

App Files Files Community

arjunanand13 commited on 5 days ago

Commit

7598edf

verified ·

1 Parent(s): b5ac2d9

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -65

app.py CHANGED Viewed

@@ -4,19 +4,11 @@ import requests
 import openai
 import gradio as gr
 import asyncio
-from langgraph import Graph, FunctionNode, RouterNode
 from gtts import gTTS
-def stt_agent(audio_path: str) -> str:
-    """Convert speech to text using OpenAI Whisper API"""
-    with open(audio_path, "rb") as afile:
-        transcript = openai.audio.transcriptions.create(
-            model="whisper-1",
-            file=afile
-        )
-    return transcript.text.strip()
-# Load API keys from environment
 openai.api_key = os.getenv("OPENAI_API_KEY")
 # --- Business Logic Functions ---
@@ -24,24 +16,22 @@ def db_agent(query: str) -> str:
     try:
         conn = sqlite3.connect("shop.db")
         cur = conn.cursor()
-        if "max revenue" in query.lower():
-            cur.execute(
-                """
-                SELECT product, SUM(amount) AS revenue
-                FROM transactions
-                WHERE date = date('now')
-                GROUP BY product
-                ORDER BY revenue DESC
-                LIMIT 1
-                """
-            )
-            row = cur.fetchone()
-            if row:
-                return f"Top product today: {row[0]} with ₹{row[1]:,.2f}"
-            return "No transactions found for today."
-        return None
     except sqlite3.OperationalError as e:
-        return f"Database error: {e}. Please initialize 'transactions' table in shop.db."
 def web_search_agent(query: str) -> str:
     try:
@@ -67,54 +57,80 @@ def llm_agent(query: str) -> str:
     )
     return response.choices[0].message.content.strip()
-# Text-to-Speech
 def tts_agent(text: str, lang: str = 'en') -> str:
-    """Convert text to speech mp3 and return filepath"""
     tts = gTTS(text=text, lang=lang)
     out_path = "response_audio.mp3"
     tts.save(out_path)
     return out_path
-# --- LangGraph Multi-Agent Setup ---
-router_node = RouterNode(
-    name="router",
-    routes=[
-        (lambda q: any(k in q.lower() for k in ["max revenue", "revenue"]), "db"),
-        (lambda q: any(k in q.lower() for k in ["who", "what", "when", "where"]), "web"),
-        (lambda q: True, "llm"),
-    ]
-)
-db_node = FunctionNode(func=db_agent, name="db")
-web_node = FunctionNode(func=web_search_agent, name="web")
-llm_node = FunctionNode(func=llm_agent, name="llm")
-# Build Graph
-graph = Graph("shop-assistant")
-graph.add_nodes([router_node, db_node, web_node, llm_node])
-graph.add_edge("router", "db", condition=lambda r: r == "db")
-graph.add_edge("router", "web", condition=lambda r: r == "web")
-graph.add_edge("router", "llm", condition=lambda r: r == "llm")
-async def graph_handler(query: str) -> str:
-    # If audio file path passed, convert to text first
-    if query.startswith("audio://"):
-        audio_path = query.replace("audio://", "")
-        query = stt_agent(audio_path)
-    text_resp = await graph.run(input=query, start_node="router")
-    return text_resp
 def handle_query(audio_or_text: str):
-    # Determine output type
     is_audio = audio_or_text.endswith('.wav') or audio_or_text.endswith('.mp3')
-    text_input = f"audio://{audio_or_text}" if is_audio else audio_or_text
-    text_resp = asyncio.run(graph_handler(text_input))
     if is_audio:
-        # Return both text and audio
-        audio_path = tts_agent(text_resp)
-        return text_resp, audio_path
-    return text_resp
 # --- Gradio UI ---
 with gr.Blocks() as demo:
@@ -123,7 +139,6 @@ with gr.Blocks() as demo:
     out_text = gr.Textbox(label="Answer (text)")
     out_audio = gr.Audio(label="Answer (speech)")
     submit = gr.Button("Submit")
-    # Examples
     gr.Examples(
         examples=[
             ["What is the max revenue product today?"],

 import openai
 import gradio as gr
 import asyncio
 from gtts import gTTS
+from typing_extensions import TypedDict
+from langgraph.graph import StateGraph, START, END
+# Load API keys
 openai.api_key = os.getenv("OPENAI_API_KEY")
 # --- Business Logic Functions ---
     try:
         conn = sqlite3.connect("shop.db")
         cur = conn.cursor()
+        cur.execute(
+            """
+            SELECT product, SUM(amount) AS revenue
+            FROM transactions
+            WHERE date = date('now')
+            GROUP BY product
+            ORDER BY revenue DESC
+            LIMIT 1
+            """
+        )
+        row = cur.fetchone()
+        if row:
+            return f"Top product today: {row[0]} with ₹{row[1]:,.2f}"
+        return "No transactions found for today."
     except sqlite3.OperationalError as e:
+        return f"Database error: {e}. Please initialize 'transactions' table in shop.db."
 def web_search_agent(query: str) -> str:
     try:
     )
     return response.choices[0].message.content.strip()
+def stt_agent(audio_path: str) -> str:
+    with open(audio_path, "rb") as afile:
+        transcript = openai.audio.transcriptions.create(
+            model="whisper-1",
+            file=afile
+        )
+    return transcript.text.strip()
 def tts_agent(text: str, lang: str = 'en') -> str:
     tts = gTTS(text=text, lang=lang)
     out_path = "response_audio.mp3"
     tts.save(out_path)
     return out_path
+# --- LangGraph State and Nodes ---
+class State(TypedDict):
+    query: str
+    result: str
+# Routing logic based on query
+def route_fn(state: State) -> str:
+    q = state["query"].lower()
+    if any(k in q for k in ["max revenue", "revenue"]):
+        return "db"
+    if any(k in q for k in ["who", "what", "when", "where"]):
+        return "web"
+    return "llm"
+# Node implementations
+def router_node(state: State) -> dict:
+    return {"query": state["query"]}
+def db_node(state: State) -> dict:
+    return {"result": db_agent(state["query"]) }
+def web_node(state: State) -> dict:
+    return {"result": web_search_agent(state["query"]) }
+def llm_node(state: State) -> dict:
+    return {"result": llm_agent(state["query"]) }
+# Build the LangGraph
+builder = StateGraph(State)
+builder.add_node("router", router_node)
+builder.set_entry_point("router")
+builder.set_conditional_entry_point(
+    route_fn,
+    path_map={"db": "db", "web": "web", "llm": "llm"}
+)
+builder.add_node("db", db_node)
+builder.add_node("web", web_node)
+builder.add_node("llm", llm_node)
+builder.add_edge(START, "router")
+builder.add_edge("db", END)
+builder.add_edge("web", END)
+builder.add_edge("llm", END)
+graph = builder.compile()
+# Handler integrates STT/TTS and graph execution
 def handle_query(audio_or_text: str):
     is_audio = audio_or_text.endswith('.wav') or audio_or_text.endswith('.mp3')
     if is_audio:
+        query = stt_agent(audio_or_text)
+    else:
+        query = audio_or_text
+    state = graph.invoke({"query": query})
+    response = state["result"]
+    if is_audio:
+        audio_path = tts_agent(response)
+        return response, audio_path
+    return response
 # --- Gradio UI ---
 with gr.Blocks() as demo:
     out_text = gr.Textbox(label="Answer (text)")
     out_audio = gr.Audio(label="Answer (speech)")
     submit = gr.Button("Submit")
     gr.Examples(
         examples=[
             ["What is the max revenue product today?"],