Petro
commited on
Commit
·
b245107
1
Parent(s):
af9a291
- Dockerfile +1 -2
- main.py +13 -1
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
-
RUN wget -q zephyr-7b-beta.Q4_K_S.gguf https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf
|
4 |
|
5 |
COPY requirements.txt ./requirements.txt
|
6 |
|
7 |
RUN python -m pip install -U pip && \
|
8 |
python -m pip install -r requirements.txt && \
|
9 |
python -m pip cache purge
|
10 |
-
#RUN wget -O /code/zephyr-7b-beta.Q4_K_S.gguf https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf
|
11 |
|
12 |
COPY ./main.py /code/main.py
|
13 |
WORKDIR /code
|
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
+
#RUN wget -q zephyr-7b-beta.Q4_K_S.gguf https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf
|
4 |
|
5 |
COPY requirements.txt ./requirements.txt
|
6 |
|
7 |
RUN python -m pip install -U pip && \
|
8 |
python -m pip install -r requirements.txt && \
|
9 |
python -m pip cache purge
|
|
|
10 |
|
11 |
COPY ./main.py /code/main.py
|
12 |
WORKDIR /code
|
main.py
CHANGED
@@ -1,7 +1,9 @@
|
|
|
|
|
|
1 |
from ctransformers import AutoModelForCausalLM
|
2 |
from fastapi import FastAPI
|
3 |
from pydantic import BaseModel
|
4 |
-
|
5 |
#
|
6 |
# llm = AutoModelForCausalLM.from_pretrained("zephyr-7b-beta.Q4_K_S.gguf",
|
7 |
# model_type='mistral',
|
@@ -15,6 +17,16 @@ class validation(BaseModel):
|
|
15 |
#Fast API
|
16 |
|
17 |
app = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@app.post("/llm_on_cpu")
|
20 |
async def stream(item: validation):
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
from ctransformers import AutoModelForCausalLM
|
4 |
from fastapi import FastAPI
|
5 |
from pydantic import BaseModel
|
6 |
+
import requests
|
7 |
#
|
8 |
# llm = AutoModelForCausalLM.from_pretrained("zephyr-7b-beta.Q4_K_S.gguf",
|
9 |
# model_type='mistral',
|
|
|
17 |
#Fast API
|
18 |
|
19 |
app = FastAPI()
|
20 |
+
file_name = "zephyr-7b-beta.Q4_K_S.gguf"
|
21 |
+
|
22 |
+
if not os.path.exists(file_name):
|
23 |
+
print("Downloading model...")
|
24 |
+
url = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf"
|
25 |
+
response = requests.get(url)
|
26 |
+
print(response.status_code)
|
27 |
+
with open(file_name, 'wb') as file:
|
28 |
+
file.write(response.content)
|
29 |
+
|
30 |
|
31 |
@app.post("/llm_on_cpu")
|
32 |
async def stream(item: validation):
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ pydantic
|
|
4 |
uvicorn
|
5 |
requests
|
6 |
python-dotenv
|
7 |
-
ctransformers
|
|
|
|
4 |
uvicorn
|
5 |
requests
|
6 |
python-dotenv
|
7 |
+
ctransformers
|
8 |
+
requests
|