# import os # import logging # import requests # from fastapi import FastAPI, HTTPException # from fastapi.responses import StreamingResponse # from pydantic import BaseModel # from openai import OpenAI # logging.basicConfig(level=logging.INFO) # logger = logging.getLogger(__name__) # app = FastAPI() # MODEL_NAME = 'llama3.1:8b' # OLLAMA_URL = "http://localhost:11434" # # def create_model(query): # # message = [ # # {"role": "system", "content": "You are a general chat bot."}, # # {"role": "user", "content": f"{query}"} # # ] # # completion = ollama_client.chat.completions.create( # # model="llama3.1:8b", # # messages=message # # # response_format=base_model, # # #temperature = 0.1 # # ) # # return completion # class Question(BaseModel): # text: str # @app.get("/") # def read_root(): # return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"} # # # POST endpoint to query the LLM # # @app.post("/ask") # # async def ask_question(question: Question): # # try: # # response = create_model(question.text) # # return {"response": response} # # except Exception as e: # # raise HTTPException(status_code=500, detail=f"Error querying the model: {str(e)}") # @app.get("/list_models") # async def list_models(): # """List all available models in Ollama.""" # try: # response = requests.get(f"{OLLAMA_URL}/api/tags") # except Exception as e: # return {"error": str(e)} # return response.json() # @app.post("/pull_model") # async def pull_model(model_name: str): # """Pull a model from Ollama's repository.""" # response = requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}) # # print(response) # return response.json() # @app.post("/generate") # async def generate_text(model: str, prompt: str, system: str = "You are a helpful AI assistant.", stream: bool = False): # """Generate text from a given prompt using a specific model.""" # try: # response = requests.post( # f"{OLLAMA_URL}/api/generate", # json={"model": model, "prompt": prompt, "system": system, "stream": stream} # ) # except Exception as e: # return {"error": str(e)} # # print(response) # return response.json() # @app.post("/embed") # async def get_embedding(model: str, text: str): # """Generate embeddings for the given text using a model.""" # try: # response = requests.post( # f"{OLLAMA_URL}/api/embeddings", # json={"model": model, "prompt": text} # ) # except Exception as e: # return {"error": str(e)} # # print(response) # return response.json() # @app.post("/chat") # async def chat(model: str, message: str, system: str = "You are a helpful chatbot."): # """Chat with the model while maintaining context.""" # try: # response = requests.post( # f"{OLLAMA_URL}/api/chat", # json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": message}]} # ) # except Exception as e: # return {"error": str(e)} # # print(response) # return response.json() # @app.on_event("startup") # async def startup_event(): # logger.info(f"Starting up with model: {MODEL_NAME}") # @app.on_event("shutdown") # async def shutdown_event(): # logger.info("Shutting down") # from fastapi import FastAPI # import httpx # app = FastAPI() # TARGET_SERVER = "http://localhost:11434" # @app.get("/proxy/{path:path}") # async def get_proxy_request(path: str): # async with httpx.AsyncClient() as client: # response = await client.get(f"{TARGET_SERVER}/{path}") # return response.json() # @app.post("/proxy/{path:path}") # async def post_proxy_request(path: str): # async with httpx.AsyncClient() as client: # response = await client.post(f"{TARGET_SERVER}/{path}") # return response.json() from fastapi import FastAPI import httpx from pydantic import BaseModel from OpenAIClasses import ChatCompletionRequest class RequestBody(BaseModel): data: dict app = FastAPI() TARGET_SERVER = "http://localhost:11434" @app.get("/proxy/{path:path}") async def proxy_get(path: str): """ Forwards GET requests to the target server """ async with httpx.AsyncClient() as client: response = await client.get(f"{TARGET_SERVER}/{path}") return response.json() @app.post("/proxy/{path:path}") async def proxy_post(path: str, request_body: RequestBody): # print(request_body) # print(request_body.model_dump()) """ Forwards POST requests to the target server """ try: data = request_body.model_dump()["data"] async with httpx.AsyncClient() as client: response = await client.post(f"{TARGET_SERVER}/{path}", json=data, timeout=120) return response.json() except Exception as e: print(e) return {"error": str(e)} @app.post("/openai_compatible/chat/completions") async def openai_compatible(request_body: ChatCompletionRequest): """ Forwards POST requests to the target server """ try: data = request_body.model_dump() async with httpx.AsyncClient() as client: response = await client.post(f"{TARGET_SERVER}/api/chat", json=data, timeout=120) print(response.json()) return {"choices": [response.json()]} except Exception as e: print(e) return {"error": str(e)}