File size: 5,555 Bytes
e8c5d46 21759c1 e8c5d46 21759c1 e8c5d46 21759c1 e8c5d46 616db8e ed40101 e8c5d46 21759c1 e8c5d46 21759c1 e8c5d46 21759c1 e8c5d46 5c21b0d e8c5d46 616db8e e8c5d46 616db8e e8c5d46 c6bb0a1 faa03b1 e8c5d46 6ace0b8 aef7a8e faa03b1 6ace0b8 e8c5d46 6ace0b8 c6bb0a1 e8c5d46 32db087 6ace0b8 8534784 c6bb0a1 faa03b1 7c0fa7a faa03b1 7c0fa7a faa03b1 aef7a8e faa03b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# import os
# import logging
# import requests
# from fastapi import FastAPI, HTTPException
# from fastapi.responses import StreamingResponse
# from pydantic import BaseModel
# from openai import OpenAI
# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)
# app = FastAPI()
# MODEL_NAME = 'llama3.1:8b'
# OLLAMA_URL = "http://localhost:11434"
# # def create_model(query):
# # message = [
# # {"role": "system", "content": "You are a general chat bot."},
# # {"role": "user", "content": f"{query}"}
# # ]
# # completion = ollama_client.chat.completions.create(
# # model="llama3.1:8b",
# # messages=message
# # # response_format=base_model,
# # #temperature = 0.1
# # )
# # return completion
# class Question(BaseModel):
# text: str
# @app.get("/")
# def read_root():
# return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
# # # POST endpoint to query the LLM
# # @app.post("/ask")
# # async def ask_question(question: Question):
# # try:
# # response = create_model(question.text)
# # return {"response": response}
# # except Exception as e:
# # raise HTTPException(status_code=500, detail=f"Error querying the model: {str(e)}")
# @app.get("/list_models")
# async def list_models():
# """List all available models in Ollama."""
# try:
# response = requests.get(f"{OLLAMA_URL}/api/tags")
# except Exception as e:
# return {"error": str(e)}
# return response.json()
# @app.post("/pull_model")
# async def pull_model(model_name: str):
# """Pull a model from Ollama's repository."""
# response = requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name})
# # print(response)
# return response.json()
# @app.post("/generate")
# async def generate_text(model: str, prompt: str, system: str = "You are a helpful AI assistant.", stream: bool = False):
# """Generate text from a given prompt using a specific model."""
# try:
# response = requests.post(
# f"{OLLAMA_URL}/api/generate",
# json={"model": model, "prompt": prompt, "system": system, "stream": stream}
# )
# except Exception as e:
# return {"error": str(e)}
# # print(response)
# return response.json()
# @app.post("/embed")
# async def get_embedding(model: str, text: str):
# """Generate embeddings for the given text using a model."""
# try:
# response = requests.post(
# f"{OLLAMA_URL}/api/embeddings",
# json={"model": model, "prompt": text}
# )
# except Exception as e:
# return {"error": str(e)}
# # print(response)
# return response.json()
# @app.post("/chat")
# async def chat(model: str, message: str, system: str = "You are a helpful chatbot."):
# """Chat with the model while maintaining context."""
# try:
# response = requests.post(
# f"{OLLAMA_URL}/api/chat",
# json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": message}]}
# )
# except Exception as e:
# return {"error": str(e)}
# # print(response)
# return response.json()
# @app.on_event("startup")
# async def startup_event():
# logger.info(f"Starting up with model: {MODEL_NAME}")
# @app.on_event("shutdown")
# async def shutdown_event():
# logger.info("Shutting down")
# from fastapi import FastAPI
# import httpx
# app = FastAPI()
# TARGET_SERVER = "http://localhost:11434"
# @app.get("/proxy/{path:path}")
# async def get_proxy_request(path: str):
# async with httpx.AsyncClient() as client:
# response = await client.get(f"{TARGET_SERVER}/{path}")
# return response.json()
# @app.post("/proxy/{path:path}")
# async def post_proxy_request(path: str):
# async with httpx.AsyncClient() as client:
# response = await client.post(f"{TARGET_SERVER}/{path}")
# return response.json()
from fastapi import FastAPI
import httpx
from pydantic import BaseModel
from OpenAIClasses import ChatCompletionRequest
class RequestBody(BaseModel):
data: dict
app = FastAPI()
TARGET_SERVER = "http://localhost:11434"
@app.get("/proxy/{path:path}")
async def proxy_get(path: str):
""" Forwards GET requests to the target server """
async with httpx.AsyncClient() as client:
response = await client.get(f"{TARGET_SERVER}/{path}")
return response.json()
@app.post("/proxy/{path:path}")
async def proxy_post(path: str, request_body: RequestBody):
# print(request_body)
# print(request_body.model_dump())
""" Forwards POST requests to the target server """
try:
data = request_body.model_dump()["data"]
async with httpx.AsyncClient() as client:
response = await client.post(f"{TARGET_SERVER}/{path}", json=data, timeout=120)
return response.json()
except Exception as e:
print(e)
return {"error": str(e)}
@app.post("/openai_compatible/chat/completions")
async def openai_compatible(request_body: ChatCompletionRequest):
""" Forwards POST requests to the target server """
try:
data = request_body.model_dump()
async with httpx.AsyncClient() as client:
response = await client.post(f"{TARGET_SERVER}/api/chat", json=data, timeout=120)
print(response.json())
return {"choices": [response.json()]}
except Exception as e:
print(e)
return {"error": str(e)}
|