ky32 commited on
Commit
65b6e4c
·
verified ·
1 Parent(s): 7ab5612

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -7,8 +7,13 @@ import llama_cpp
7
  from llama_cpp import Llama
8
  from huggingface_hub import hf_hub_download
9
 
 
 
 
 
10
  app = FastAPI()
11
 
 
12
  model_path = hf_hub_download(
13
  repo_id="TheBloke/Mistral-7B-v0.1-GGUF",
14
  filename="mistral-7b-v0.1.Q4_K_M.gguf")
@@ -18,7 +23,7 @@ llm = Llama(
18
  n_ctx=2048,
19
  n_threads=2
20
  )
21
-
22
 
23
  @app.get("/")
24
  async def generate_text():
@@ -29,7 +34,6 @@ async def generate_text():
29
  max_tokens=32,
30
  stop=["Q:", "\n"],
31
  echo=True)
32
- """
33
  output = llm.create_chat_completion(
34
  messages=[
35
  {
@@ -43,6 +47,10 @@ async def generate_text():
43
  },
44
  temperature=0.7,
45
  )
46
- return output
 
 
 
 
47
  except Exception as e:
48
  raise HTTPException(status_code=500, detail=str(e))
 
7
  from llama_cpp import Llama
8
  from huggingface_hub import hf_hub_download
9
 
10
+ import transformers
11
+ import torch
12
+
13
+
14
  app = FastAPI()
15
 
16
+ """
17
  model_path = hf_hub_download(
18
  repo_id="TheBloke/Mistral-7B-v0.1-GGUF",
19
  filename="mistral-7b-v0.1.Q4_K_M.gguf")
 
23
  n_ctx=2048,
24
  n_threads=2
25
  )
26
+ """
27
 
28
  @app.get("/")
29
  async def generate_text():
 
34
  max_tokens=32,
35
  stop=["Q:", "\n"],
36
  echo=True)
 
37
  output = llm.create_chat_completion(
38
  messages=[
39
  {
 
47
  },
48
  temperature=0.7,
49
  )
50
+ """
51
+ model_id = "meta-llama/Meta-Llama-3-8B"
52
+ pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
53
+ pipeline("Hey how are you doing today?")
54
+ return pipeline("Hey how are you doing today?")
55
  except Exception as e:
56
  raise HTTPException(status_code=500, detail=str(e))