Spaces:

Alvin2707
/

Example_LLM_Description

Sleeping

Alvin2707 commited on Mar 10

Commit

384f319

verified ·

1 Parent(s): f410ee6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,24 @@
-from fastapi import FastAPI
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-app = FastAPI()
-# Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
-model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct").to("cpu")  # Gunakan CPU karena HF Spaces gratis hanya menyediakan CPU
-@app.get("/")
-def home():
-    return {"message": "FastAPI running with Llama-3.2-1B-Instruct"}
-@app.post("/generate")
-def generate_text(prompt: str):
-    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")  # Gunakan CPU
-    output = model.generate(**inputs, max_length=200)
-    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    return {"generated_text": generated_text}

+from fastapi import FastAPI
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import os
+app = FastAPI()
+# Set lokasi cache yang bisa diakses
+os.environ["HF_HOME"] = "./hf_cache"
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct", cache_dir="./hf_cache")
+model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct", cache_dir="./hf_cache").to("cpu")
+@app.get("/")
+def home():
+    return {"message": "FastAPI running with Llama-3.2-1B-Instruct"}
+@app.post("/generate")
+def generate_text(prompt: str):
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")  # Gunakan CPU
+    output = model.generate(**inputs, max_length=200)
+    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return {"generated_text": generated_text}