File size: 5,555 Bytes
e8c5d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
21759c1
e8c5d46
 
 
 
 
21759c1
e8c5d46
616db8e
ed40101
e8c5d46
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
21759c1
e8c5d46
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
5c21b0d
e8c5d46
 
 
616db8e
e8c5d46
 
 
 
616db8e
e8c5d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6bb0a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa03b1
e8c5d46
6ace0b8
aef7a8e
faa03b1
6ace0b8
 
e8c5d46
 
 
 
6ace0b8
c6bb0a1
 
e8c5d46
 
32db087
 
6ace0b8
 
8534784
 
c6bb0a1
faa03b1
 
7c0fa7a
faa03b1
7c0fa7a
faa03b1
 
 
 
aef7a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa03b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# import os
# import logging
# import requests
# from fastapi import FastAPI, HTTPException
# from fastapi.responses import StreamingResponse
# from pydantic import BaseModel
# from openai import OpenAI

# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)

# app = FastAPI()
# MODEL_NAME = 'llama3.1:8b'

# OLLAMA_URL = "http://localhost:11434"


# # def create_model(query):
# #     message = [
# #         {"role": "system", "content": "You are a general chat bot."},
# #         {"role": "user", "content": f"{query}"}
# #     ]

# #     completion = ollama_client.chat.completions.create(
# #         model="llama3.1:8b",
# #         messages=message
# #         # response_format=base_model,
# #         #temperature = 0.1
# #     )
# #     return completion

# class Question(BaseModel):
#     text: str

# @app.get("/")
# def read_root():
#     return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}


# # # POST endpoint to query the LLM
# # @app.post("/ask")
# # async def ask_question(question: Question):
# #     try:
# #         response = create_model(question.text)
# #         return {"response": response}
# #     except Exception as e:
# #         raise HTTPException(status_code=500, detail=f"Error querying the model: {str(e)}")


# @app.get("/list_models")
# async def list_models():
#     """List all available models in Ollama."""
#     try:
#         response = requests.get(f"{OLLAMA_URL}/api/tags")
#     except Exception as e:
#         return {"error": str(e)}

#     return response.json()

# @app.post("/pull_model")
# async def pull_model(model_name: str):
#     """Pull a model from Ollama's repository."""
#     response = requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name})
#     # print(response)

#     return response.json()


# @app.post("/generate")
# async def generate_text(model: str, prompt: str, system: str = "You are a helpful AI assistant.", stream: bool = False):
#     """Generate text from a given prompt using a specific model."""
#     try:
#         response =  requests.post(
#             f"{OLLAMA_URL}/api/generate",
#             json={"model": model, "prompt": prompt, "system": system, "stream": stream}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()

# @app.post("/embed")
# async def get_embedding(model: str, text: str):
#     """Generate embeddings for the given text using a model."""
#     try:
#         response = requests.post(
#             f"{OLLAMA_URL}/api/embeddings",
#             json={"model": model, "prompt": text}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()

# @app.post("/chat")
# async def chat(model: str, message: str, system: str = "You are a helpful chatbot."):
#     """Chat with the model while maintaining context."""
#     try:
#         response = requests.post(
#             f"{OLLAMA_URL}/api/chat",
#             json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": message}]}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()



# @app.on_event("startup")
# async def startup_event():
#     logger.info(f"Starting up with model: {MODEL_NAME}")

# @app.on_event("shutdown")
# async def shutdown_event():
#     logger.info("Shutting down")



# from fastapi import FastAPI
# import httpx

# app = FastAPI()
# TARGET_SERVER = "http://localhost:11434"

# @app.get("/proxy/{path:path}")
# async def get_proxy_request(path: str):
#     async with httpx.AsyncClient() as client:
#         response = await client.get(f"{TARGET_SERVER}/{path}")
#     return response.json()

# @app.post("/proxy/{path:path}")
# async def post_proxy_request(path: str):
#     async with httpx.AsyncClient() as client:
#         response = await client.post(f"{TARGET_SERVER}/{path}")
#     return response.json()


from fastapi import FastAPI
import httpx
from pydantic import BaseModel
from OpenAIClasses import ChatCompletionRequest

class RequestBody(BaseModel):
    data: dict

app = FastAPI()
TARGET_SERVER = "http://localhost:11434"

@app.get("/proxy/{path:path}")
async def proxy_get(path: str):
    """ Forwards GET requests to the target server """
    async with httpx.AsyncClient() as client:
        response = await client.get(f"{TARGET_SERVER}/{path}")
    return response.json()

@app.post("/proxy/{path:path}")
async def proxy_post(path: str, request_body: RequestBody):
    # print(request_body)
    # print(request_body.model_dump())
    """ Forwards POST requests to the target server """
    try:
        data = request_body.model_dump()["data"]
    
        async with httpx.AsyncClient() as client:
            response = await client.post(f"{TARGET_SERVER}/{path}", json=data, timeout=120)
        return response.json()
    except Exception as e:
        print(e)
        return {"error": str(e)}


@app.post("/openai_compatible/chat/completions")
async def openai_compatible(request_body: ChatCompletionRequest):

    """ Forwards POST requests to the target server """
    try:
        data = request_body.model_dump()

        async with httpx.AsyncClient() as client:
            response = await client.post(f"{TARGET_SERVER}/api/chat", json=data, timeout=120)
        print(response.json())
        return {"choices": [response.json()]}
    except Exception as e:
        print(e)
        return {"error": str(e)}