ky32 commited on
Commit
97f6c69
·
verified ·
1 Parent(s): f7a7a5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -28
app.py CHANGED
@@ -1,39 +1,29 @@
1
  from fastapi import FastAPI, HTTPException
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from huggingface_hub import hf_hub_download
4
- from huggingface_hub import snapshot_download
5
  import os
 
 
 
 
 
6
 
7
  app = FastAPI()
8
 
 
 
 
 
 
 
 
9
  @app.get("/")
10
  async def generate_text():
11
  try:
12
- # Get the cache directory from the environment variable
13
- cache_dir = os.getenv("HF_HOME")
14
-
15
- # Specify the directory for model download within the Docker container
16
- model_dir = os.path.join(cache_dir, "TheBloke/Mistral-7B-v0.1-GGUF")
17
- os.makedirs(model_dir, exist_ok=True)
18
-
19
- #hf_hub_download(repo_id="TheBloke/Mistral-7B-v0.1-GGUF", filename="mistral-7b-v0.1.Q4_K_M.gguf", local_dir=model_dir)
20
- #hf_hub_download(repo_id="TheBloke/Mistral-7B-v0.1-GGUF", filename="config.json", local_dir=model_dir)
21
- #snapshot_download(repo_id="TheBloke/Mistral-7B-v0.1-GGUF", local_dir=model_dir)
22
-
23
- # Check if config.json file exists in the model directory
24
- config_file = os.path.join(model_dir, "config.json")
25
- if not os.path.exists(config_file):
26
- raise ValueError("config.json file is missing in the model directory")
27
-
28
- # Load tokenizer and model
29
- tokenizer = AutoTokenizer.from_pretrained(model_dir)
30
- model = AutoModelForCausalLM.from_pretrained(model_dir)
31
-
32
- # Generate text
33
- prompt = "Once upon a time, there was a"
34
- inputs = tokenizer(prompt, return_tensors="pt")
35
- output = model.generate(input_ids=inputs["input_ids"], max_length=50, num_return_sequences=3, temperature=0.7)
36
- generated_texts = tokenizer.batch_decode(output, skip_special_tokens=True)
37
- return generated_texts
38
  except Exception as e:
39
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import FastAPI, HTTPException
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
3
  import os
4
+ import copy
5
+ import time
6
+ import llama_cpp
7
+ from llama_cpp import Llama
8
+ from huggingface_hub import hf_hub_download
9
 
10
  app = FastAPI()
11
 
12
+ llm = Llama(
13
+ model_path=hf_hub_download(
14
+ repo_id="TheBloke/Llama-2-7b-Chat-GGUF",
15
+ filename="mistral-7b-v0.1.Q4_K_M.gguf"),
16
+ n_ctx=2048,
17
+ )
18
+
19
  @app.get("/")
20
  async def generate_text():
21
  try:
22
+ output = llm(
23
+ "Q: Name the planets in the solar system? A: ",
24
+ max_tokens=32,
25
+ stop=["Q:", "\n"],
26
+ echo=True)
27
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  except Exception as e:
29
  raise HTTPException(status_code=500, detail=str(e))