Spaces:

aioverlords-amnil
/

internal-ollama

Paused

File size: 5,555 Bytes

e8c5d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
21759c1
e8c5d46
 
 
 
 
21759c1
e8c5d46
616db8e
ed40101
e8c5d46
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
21759c1
e8c5d46
 
 
 
 
 
 
 
 
 
 
21759c1
e8c5d46
5c21b0d
e8c5d46
 
 
616db8e
e8c5d46
 
 
 
616db8e
e8c5d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6bb0a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa03b1
e8c5d46
6ace0b8
aef7a8e
faa03b1
6ace0b8
 
e8c5d46
 
 
 
6ace0b8
c6bb0a1
 
e8c5d46
 
32db087
 
6ace0b8
 
8534784
 
c6bb0a1
faa03b1
 
7c0fa7a
faa03b1
7c0fa7a
faa03b1
 
 
 
aef7a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa03b1

# import os
# import logging
# import requests
# from fastapi import FastAPI, HTTPException
# from fastapi.responses import StreamingResponse
# from pydantic import BaseModel
# from openai import OpenAI

# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)

# app = FastAPI()
# MODEL_NAME = 'llama3.1:8b'

# OLLAMA_URL = "http://localhost:11434"


# # def create_model(query):
# #     message = [
# #         {"role": "system", "content": "You are a general chat bot."},
# #         {"role": "user", "content": f"{query}"}
# #     ]

# #     completion = ollama_client.chat.completions.create(
# #         model="llama3.1:8b",
# #         messages=message
# #         # response_format=base_model,
# #         #temperature = 0.1
# #     )
# #     return completion

# class Question(BaseModel):
#     text: str

# @app.get("/")
# def read_root():
#     return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}


# # # POST endpoint to query the LLM
# # @app.post("/ask")
# # async def ask_question(question: Question):
# #     try:
# #         response = create_model(question.text)
# #         return {"response": response}
# #     except Exception as e:
# #         raise HTTPException(status_code=500, detail=f"Error querying the model: {str(e)}")


# @app.get("/list_models")
# async def list_models():
#     """List all available models in Ollama."""
#     try:
#         response = requests.get(f"{OLLAMA_URL}/api/tags")
#     except Exception as e:
#         return {"error": str(e)}

#     return response.json()

# @app.post("/pull_model")
# async def pull_model(model_name: str):
#     """Pull a model from Ollama's repository."""
#     response = requests.post(f"{OLLAMA_URL}/api/pull", json={"name": model_name})
#     # print(response)

#     return response.json()


# @app.post("/generate")
# async def generate_text(model: str, prompt: str, system: str = "You are a helpful AI assistant.", stream: bool = False):
#     """Generate text from a given prompt using a specific model."""
#     try:
#         response =  requests.post(
#             f"{OLLAMA_URL}/api/generate",
#             json={"model": model, "prompt": prompt, "system": system, "stream": stream}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()

# @app.post("/embed")
# async def get_embedding(model: str, text: str):
#     """Generate embeddings for the given text using a model."""
#     try:
#         response = requests.post(
#             f"{OLLAMA_URL}/api/embeddings",
#             json={"model": model, "prompt": text}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()

# @app.post("/chat")
# async def chat(model: str, message: str, system: str = "You are a helpful chatbot."):
#     """Chat with the model while maintaining context."""
#     try:
#         response = requests.post(
#             f"{OLLAMA_URL}/api/chat",
#             json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": message}]}
#         )
#     except Exception as e:
#         return {"error": str(e)}
#     # print(response)

#     return response.json()



# @app.on_event("startup")
# async def startup_event():
#     logger.info(f"Starting up with model: {MODEL_NAME}")

# @app.on_event("shutdown")
# async def shutdown_event():
#     logger.info("Shutting down")



# from fastapi import FastAPI
# import httpx

# app = FastAPI()
# TARGET_SERVER = "http://localhost:11434"

# @app.get("/proxy/{path:path}")
# async def get_proxy_request(path: str):
#     async with httpx.AsyncClient() as client:
#         response = await client.get(f"{TARGET_SERVER}/{path}")
#     return response.json()

# @app.post("/proxy/{path:path}")
# async def post_proxy_request(path: str):
#     async with httpx.AsyncClient() as client:
#         response = await client.post(f"{TARGET_SERVER}/{path}")
#     return response.json()


from fastapi import FastAPI
import httpx
from pydantic import BaseModel
from OpenAIClasses import ChatCompletionRequest

class RequestBody(BaseModel):
    data: dict

app = FastAPI()
TARGET_SERVER = "http://localhost:11434"

@app.get("/proxy/{path:path}")
async def proxy_get(path: str):
    """ Forwards GET requests to the target server """
    async with httpx.AsyncClient() as client:
        response = await client.get(f"{TARGET_SERVER}/{path}")
    return response.json()

@app.post("/proxy/{path:path}")
async def proxy_post(path: str, request_body: RequestBody):
    # print(request_body)
    # print(request_body.model_dump())
    """ Forwards POST requests to the target server """
    try:
        data = request_body.model_dump()["data"]
    
        async with httpx.AsyncClient() as client:
            response = await client.post(f"{TARGET_SERVER}/{path}", json=data, timeout=120)
        return response.json()
    except Exception as e:
        print(e)
        return {"error": str(e)}


@app.post("/openai_compatible/chat/completions")
async def openai_compatible(request_body: ChatCompletionRequest):

    """ Forwards POST requests to the target server """
    try:
        data = request_body.model_dump()

        async with httpx.AsyncClient() as client:
            response = await client.post(f"{TARGET_SERVER}/api/chat", json=data, timeout=120)
        print(response.json())
        return {"choices": [response.json()]}
    except Exception as e:
        print(e)
        return {"error": str(e)}