import os from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer # Set cache directories to /tmp which is writable os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache" os.environ["HF_HOME"] = "/tmp/hf_home" os.environ["XDG_CACHE_HOME"] = "/tmp/cache" # Create cache directories if they don't exist os.makedirs("/tmp/transformers_cache", exist_ok=True) os.makedirs("/tmp/hf_home", exist_ok=True) os.makedirs("/tmp/cache", exist_ok=True) # Load model with explicit cache directory model_name = "mynuddin/chatbot" tokenizer = AutoTokenizer.from_pretrained( model_name, cache_dir="/tmp/model_cache" ) model = AutoModelForCausalLM.from_pretrained( model_name, cache_dir="/tmp/model_cache" ).to("cpu") app = FastAPI() @app.post("/generate") def generate_text(prompt: str): inputs = tokenizer(prompt, return_tensors="pt") output = model.generate(**inputs, max_length=128) generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return {"generated_query": generated_text}