File size: 899 Bytes
65226cd
37527e9
c074c98
8c69006
37527e9
f726f33
87e455f
f726f33
 
e3d6e0d
 
a5148e9
 
e3d6e0d
 
 
 
 
 
37527e9
 
 
87e455f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from fastapi import FastAPI
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

app = FastAPI()

@app.get("/")
async def install_model():
    #hf_hub_download(repo_id="TheBloke/Mistral-7B-v0.1-GGUF", filename="mistral-7b-v0.1.Q4_K_M.gguf")
    prompt = "Once upon a time, there was a"
    tokenizer = AutoTokenizer.from_pretrained("./mistral-7b-v0.1.Q4_K_M.gguf")
    model = AutoModelForCausalLM.from_pretrained("./mistral-7b-v0.1.Q4_K_M.gguf")
    inputs = tokenizer(prompt, return_tensors="pt")
    output = model.generate(input_ids=inputs["input_ids"], max_length=50, num_return_sequences=3, temperature=0.7)
    generated_texts = tokenizer.batch_decode(output, skip_special_tokens=True)
    for i, text in enumerate(generated_texts):
        print(f"Generated Text {i+1}: {text}")
    return generated_texts