from flask import Flask, request, jsonify from transformers import AutoModelForCausalLM, AutoTokenizer import torch app = Flask(__name__) # Charger le modèle depuis Hugging Face MODEL_NAME = "fatmata/psybot" # Remplace avec le vrai nom de ton modèle tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16) @app.route("/chat", methods=["POST"]) def chat(): data = request.json user_input = data.get("message", "") if not user_input: return jsonify({"error": "Message vide"}), 400 # Génération de la réponse prompt = f"<|startoftext|><|user|> {user_input} <|bot|>" inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) with torch.no_grad(): output = model.generate(inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id) response = tokenizer.decode(output[0], skip_special_tokens=True) if "<|bot|>" in response: response = response.split("<|bot|>")[-1].strip() return jsonify({"response": response}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)