File size: 1,445 Bytes
0314811
37527e9
0314811
97f6c69
 
 
 
 
37527e9
65b6e4c
 
 
 
f726f33
b46d5c6
65b6e4c
daf036c
3d2a79c
daf036c
b46d5c6
daf036c
 
97f6c69
79d370e
97f6c69
65b6e4c
97f6c69
0314811
 
 
bf5c4b9
97f6c69
 
 
 
 
bf5c4b9
 
 
 
 
 
 
 
 
 
 
 
 
65b6e4c
 
 
 
 
0314811
84927e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from fastapi import FastAPI, HTTPException
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import copy
import time
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download  

import transformers
import torch


app = FastAPI()

"""
model_path = hf_hub_download(
        repo_id="TheBloke/Mistral-7B-v0.1-GGUF",
        filename="mistral-7b-v0.1.Q4_K_M.gguf")

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=2
) 
"""

@app.get("/")
async def generate_text():
    try:
        """
        output = llm(
          "Q: Name the planets in the solar system? A: ", 
          max_tokens=32, 
          stop=["Q:", "\n"], 
          echo=True)
        output = llm.create_chat_completion(
        messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant that outputs in JSON.",
        },
        {"role": "user", "content": "Who won the world series in 2020"},
    ],
    response_format={
        "type": "json_object",
    },
    temperature=0.7,
)
    """
    model_id = "meta-llama/Meta-Llama-3-8B"
    pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
    pipeline("Hey how are you doing today?")
        return  pipeline("Hey how are you doing today?")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))