Kaan commited on
Commit
8c69006
·
verified ·
1 Parent(s): 148def9
Files changed (1) hide show
  1. app.py +2 -16
app.py CHANGED
@@ -1,24 +1,10 @@
1
  from fastapi import FastAPI
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from llama_cpp import Llama
 
4
 
5
 
6
- # Create an instance of the FastAPI class
7
- app = FastAPI()
8
-
9
- # Define a route for the root endpoint
10
- @app.get("/llm")
11
- async def read_root():
12
- llm = Llama.from_pretrained(
13
- repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
14
- filename="*q8_0.gguf",
15
- verbose=False)
16
- output = llm(
17
- "Q: Name the planets in the solar system? A: ", # Prompt
18
- max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
19
- stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
20
- echo=True )
21
- return {"message": output}
22
 
23
 
24
 
 
1
  from fastapi import FastAPI
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download
5
 
6
 
7
+ hf_hub_download(repo_id="TheBloke/Mistral-7B-v0.1-GGUF", filename="mistral-7b-v0.1.Q4_K_M.gguf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10