File size: 768 Bytes
0314811
37527e9
0314811
97f6c69
 
 
 
 
37527e9
f726f33
b46d5c6
daf036c
3d2a79c
daf036c
b46d5c6
daf036c
 
97f6c69
79d370e
97f6c69
bcd9770
97f6c69
0314811
 
 
97f6c69
 
 
 
 
 
0314811
84927e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from fastapi import FastAPI, HTTPException
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import copy
import time
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download  

app = FastAPI()

model_path = hf_hub_download(
        repo_id="TheBloke/Mistral-7B-v0.1-GGUF",
        filename="mistral-7b-v0.1.Q4_K_M.gguf")

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=2
) 


@app.get("/")
async def generate_text():
    try:
        output = llm(
          "Q: Name the planets in the solar system? A: ", 
          max_tokens=32, 
          stop=["Q:", "\n"], 
          echo=True)
        return output
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))