# import os | |
# import logging | |
# import requests | |
# from fastapi import FastAPI, HTTPException | |
# from fastapi.responses import StreamingResponse | |
# from pydantic import BaseModel | |
# from openai import OpenAI | |
# logging.basicConfig(level=logging.INFO) | |
# logger = logging.getLogger(__name__) | |
# app = FastAPI() | |
# MODEL_NAME = 'llama3.1:8b' | |
# OLLAMA_URL = "http://localhost:11434" | |
# # def create_model(query): | |
# # message = [ | |
# # {"role": "system", "content": "You are a general chat bot."}, | |
# # {"role": "user", "content": f"{query}"} | |
# # ] | |
# # completion = ollama_client.chat.completions.create( | |
# # model="llama3.1:8b", | |
# # messages=message | |
# # # response_format=base_model, | |
# # #temperature = 0.1 | |
# # ) | |
# # return completion | |
# class Question(BaseModel): | |
# text: str | |
# @app.get("/") | |
# def read_root(): | |
# return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"} | |
# # # POST endpoint to query the LLM | |
# # @app.post("/ask") | |
# # async def ask_question(question: Question): | |
# # try: | |
# # response = create_model(question.text) | |
# # return {"response": response} | |
# # except Exception as e: | |
# # raise HTTPException(status_code=500, detail=f"Error querying the model: {str(e)}") | |
# @app.get("/list_models") | |
# async def list_models(): | |
# """List all available models in Ollama.""" | |
# try: | |
# response = requests.get(f"{OLLAMA_URL}/api/tags") | |
# except Exception as e: | |
# return {"error": str(e)} | |
# return response.json() | |
# @app.post("/pull_model") | |
# async def pull_model(model_name: str): | |
# """Pull a model from Ollama's repository.""" | |
# response = requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name}) | |
# # print(response) | |
# return response.json() | |
# @app.post("/generate") | |
# async def generate_text(model: str, prompt: str, system: str = "You are a helpful AI assistant.", stream: bool = False): | |
# """Generate text from a given prompt using a specific model.""" | |
# try: | |
# response = requests.post( | |
# f"{OLLAMA_URL}/api/generate", | |
# json={"model": model, "prompt": prompt, "system": system, "stream": stream} | |
# ) | |
# except Exception as e: | |
# return {"error": str(e)} | |
# # print(response) | |
# return response.json() | |
# @app.post("/embed") | |
# async def get_embedding(model: str, text: str): | |
# """Generate embeddings for the given text using a model.""" | |
# try: | |
# response = requests.post( | |
# f"{OLLAMA_URL}/api/embeddings", | |
# json={"model": model, "prompt": text} | |
# ) | |
# except Exception as e: | |
# return {"error": str(e)} | |
# # print(response) | |
# return response.json() | |
# @app.post("/chat") | |
# async def chat(model: str, message: str, system: str = "You are a helpful chatbot."): | |
# """Chat with the model while maintaining context.""" | |
# try: | |
# response = requests.post( | |
# f"{OLLAMA_URL}/api/chat", | |
# json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": message}]} | |
# ) | |
# except Exception as e: | |
# return {"error": str(e)} | |
# # print(response) | |
# return response.json() | |
# @app.on_event("startup") | |
# async def startup_event(): | |
# logger.info(f"Starting up with model: {MODEL_NAME}") | |
# @app.on_event("shutdown") | |
# async def shutdown_event(): | |
# logger.info("Shutting down") | |
# from fastapi import FastAPI | |
# import httpx | |
# app = FastAPI() | |
# TARGET_SERVER = "http://localhost:11434" | |
# @app.get("/proxy/{path:path}") | |
# async def get_proxy_request(path: str): | |
# async with httpx.AsyncClient() as client: | |
# response = await client.get(f"{TARGET_SERVER}/{path}") | |
# return response.json() | |
# @app.post("/proxy/{path:path}") | |
# async def post_proxy_request(path: str): | |
# async with httpx.AsyncClient() as client: | |
# response = await client.post(f"{TARGET_SERVER}/{path}") | |
# return response.json() | |
from fastapi import FastAPI | |
import httpx | |
from pydantic import BaseModel | |
from OpenAIClasses import ChatCompletionRequest | |
class RequestBody(BaseModel): | |
data: dict | |
app = FastAPI() | |
TARGET_SERVER = "http://localhost:11434" | |
async def proxy_get(path: str): | |
""" Forwards GET requests to the target server """ | |
async with httpx.AsyncClient() as client: | |
response = await client.get(f"{TARGET_SERVER}/{path}") | |
return response.json() | |
async def proxy_post(path: str, request_body: RequestBody): | |
# print(request_body) | |
# print(request_body.model_dump()) | |
""" Forwards POST requests to the target server """ | |
try: | |
data = request_body.model_dump()["data"] | |
async with httpx.AsyncClient() as client: | |
response = await client.post(f"{TARGET_SERVER}/{path}", json=data, timeout=120) | |
return response.json() | |
except Exception as e: | |
print(e) | |
return {"error": str(e)} | |
async def openai_compatible(request_body: ChatCompletionRequest): | |
""" Forwards POST requests to the target server """ | |
try: | |
data = request_body.model_dump() | |
async with httpx.AsyncClient() as client: | |
response = await client.post(f"{TARGET_SERVER}/api/chat", json=data, timeout=120) | |
print(response.json()) | |
return {"choices": [response.json()]} | |
except Exception as e: | |
print(e) | |
return {"error": str(e)} | |