Alvin2707 commited on
Commit
384f319
·
verified ·
1 Parent(s): f410ee6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -20
app.py CHANGED
@@ -1,20 +1,24 @@
1
- from fastapi import FastAPI
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
-
5
- app = FastAPI()
6
-
7
- # Load model and tokenizer
8
- tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
9
- model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct").to("cpu") # Gunakan CPU karena HF Spaces gratis hanya menyediakan CPU
10
-
11
- @app.get("/")
12
- def home():
13
- return {"message": "FastAPI running with Llama-3.2-1B-Instruct"}
14
-
15
- @app.post("/generate")
16
- def generate_text(prompt: str):
17
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Gunakan CPU
18
- output = model.generate(**inputs, max_length=200)
19
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
20
- return {"generated_text": generated_text}
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import os
5
+
6
+ app = FastAPI()
7
+
8
+ # Set lokasi cache yang bisa diakses
9
+ os.environ["HF_HOME"] = "./hf_cache"
10
+
11
+ # Load model and tokenizer
12
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct", cache_dir="./hf_cache")
13
+ model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct", cache_dir="./hf_cache").to("cpu")
14
+
15
+ @app.get("/")
16
+ def home():
17
+ return {"message": "FastAPI running with Llama-3.2-1B-Instruct"}
18
+
19
+ @app.post("/generate")
20
+ def generate_text(prompt: str):
21
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Gunakan CPU
22
+ output = model.generate(**inputs, max_length=200)
23
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
24
+ return {"generated_text": generated_text}