File size: 5,186 Bytes
e0f29bb
 
ccc745c
 
 
 
5bd65b0
ccc745c
 
 
e0f29bb
ccc745c
 
e0f29bb
ccc745c
 
 
 
c81aa98
6ac8934
e0f29bb
6ac8934
72298ff
6ac8934
7df20a1
56e4890
7df20a1
6ac8934
 
7df20a1
56e4890
7df20a1
6ac8934
 
7df20a1
56e4890
7df20a1
6ac8934
 
54c62fb
56e4890
e0f29bb
 
 
6ac8934
 
7df20a1
56e4890
7df20a1
e0f29bb
 
 
56e4890
7df20a1
72298ff
e0f29bb
 
 
56e4890
7df20a1
72298ff
e0f29bb
 
 
56e4890
7df20a1
72298ff
7df20a1
72298ff
e0f29bb
 
 
 
58aeeb9
54c62fb
6d24d35
e0f29bb
 
54c62fb
e0f29bb
 
 
7df20a1
6d24d35
72298ff
e0f29bb
58aeeb9
 
 
 
 
 
 
 
6d24d35
e0f29bb
58aeeb9
54c62fb
e0f29bb
 
de718ca
 
 
58aeeb9
 
 
 
 
 
 
de718ca
e0f29bb
72298ff
de718ca
e0f29bb
de718ca
0c02b02
 
 
 
 
e0f29bb
 
 
 
 
de718ca
58aeeb9
e0f29bb
 
54c62fb
e0f29bb
de718ca
7df20a1
6ac8934
e0f29bb
58aeeb9
e0f29bb
58aeeb9
6d24d35
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""LangGraph Agent"""
import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.tools import tool
from langchain.tools.retriever import create_retriever_tool
from supabase.client import Client, create_client
import re

load_dotenv()

# === Tools ===
@tool
def multiply(a: int, b: int) -> int:
    """Multiplies two integers and returns the result."""
    return a * b

@tool
def add(a: int, b: int) -> int:
    """Adds two integers and returns the sum."""
    return a + b

@tool
def subtract(a: int, b: int) -> int:
    """Subtracts the second integer from the first and returns the result."""
    return a - b

@tool
def divide(a: int, b: int) -> float:
    """Divides the first integer by the second and returns the result as a float."""
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def modulus(a: int, b: int) -> int:
    """Returns the remainder of dividing the first integer by the second."""
    return a % b

@tool
def wiki_search(query: str) -> str:
    """Searches Wikipedia for a query and returns the top 2 results as a formatted string."""
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    return "\n\n---\n\n".join([doc.page_content for doc in search_docs])

@tool
def web_search(query: str) -> str:
    """Uses Tavily to search the web for a query and returns the top 3 result snippets."""
    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
    return "\n\n---\n\n".join([doc.page_content for doc in search_docs])

@tool
def arvix_search(query: str) -> str:
    """Searches Arxiv for academic papers related to the query and returns the top 3 abstracts."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    return "\n\n---\n\n".join([doc.page_content[:1000] for doc in search_docs])

# === System Prompt ===
with open("system_prompt.txt", "r", encoding="utf-8") as f:
    system_prompt = f.read()
sys_msg = SystemMessage(content=system_prompt)

# === Embeddings & Vector Store ===
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
supabase: Client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY"))
vector_store = SupabaseVectorStore(
    client=supabase,
    embedding=embeddings,
    table_name="Vector_Test",
    query_name="match_documents_langchain",
)

# === Tools ===
tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]

# === LangGraph Builder ===
def build_graph(provider: str = "huggingface"):
    if provider == "huggingface":
        llm = ChatHuggingFace(
            llm=HuggingFaceEndpoint(
                repo_id="Qwen/Qwen1.5-7B-Chat",
                temperature=0,
            )
        )
    else:
        raise ValueError("Only 'huggingface' (Qwen3) is supported in this build.")

    llm_with_tools = llm.bind_tools(tools)

    def retriever(state: MessagesState):
        query = state["messages"][-1].content
        similar = vector_store.similarity_search(query)
        return {
            "messages": [
                sys_msg,
                state["messages"][-1],
                HumanMessage(content=f"Reference: {similar[0].page_content}")
            ]
        }

    def assistant(state: MessagesState):
        response = llm_with_tools.invoke(state["messages"])
        return {"messages": state["messages"] + [response]}

    def formatter(state: MessagesState):
        last = state["messages"][-1].content.strip()
        cleaned = re.sub(r"<.*?>", "", last)
        cleaned = re.sub(r"(Final\s*Answer:|Answer:)", "", cleaned, flags=re.IGNORECASE)
        cleaned = cleaned.strip().split("\n")[0].strip()
        return {"messages": [AIMessage(content=cleaned)]}

    builder = StateGraph(MessagesState)
    builder.add_node("retriever", retriever)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_node("formatter", formatter)

    builder.add_edge(START, "retriever")
    builder.add_edge("retriever", "assistant")
    builder.add_conditional_edges("assistant", tools_condition)
    builder.add_edge("tools", "assistant")
    builder.add_edge("assistant", "formatter")

    return builder.compile()

# === Run Test ===
if __name__ == "__main__":
    graph = build_graph()
    result = graph.invoke({"messages": [HumanMessage(content="What is the capital of France?")]})
    for m in result["messages"]:
        m.pretty_print()