import os os.system(""" # 1. safetensors -> PyTorch 形式に変換 python -c " from transformers import AutoModelForCausalLM import torch model = AutoModelForCausalLM.from_pretrained('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct') torch.save(model.state_dict(), 'pytorch_model.bin') " # 2. GGUF 形式に変換 (llama.cpp が必要) git clone https://github.com/ggerganov/llama.cpp cd llama.cpp python convert.py --outtype f16 /app/pytorch_model.bin """) from flask import Flask, request, Response, render_template from huggingface_hub import hf_hub_download from llama_cpp import Llama app = Flask(__name__) # HTML content as a string HTML_CONTENT = ''' AI Chat Interface
AI こんにちは!
どのようにお手伝いできますか?
''' def download_model(): model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # 適切なGGUFファイル名に変更 return hf_hub_download(model_name, filename=model_file) def initialize_model(model_path): return Llama( model_path=model_path, n_ctx=4096, n_threads=4, n_gpu_layers=-1 # Use GPU if available ) model_path = "/app/pytorch_model.bin" llm = initialize_model(model_path) system_prompt = ( "You are a helpful AI coding assistant. Your mission is to help people with programming " "and technical questions, providing clear and concise answers." ) chat_history = [{"role": "system", "content": system_prompt}] @app.route('/') def index(): return HTML_CONTENT @app.route('/chat') def chat(): global chat_history user_message = request.args.get('message', '') chat_history.append({"role": "user", "content": user_message}) full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history]) full_prompt += "\nAssistant:" def generate(): ai_response = "" for token in llm(full_prompt, max_tokens=1000, stop=["User:"], stream=True): chunk = token['choices'][0]['text'] if chunk: ai_response += chunk yield f"data: {chunk}\n\n" chat_history.append({"role": "assistant", "content": ai_response.strip()}) if len(chat_history) > 10: # Limit history to last 10 messages chat_history = chat_history[-10:] yield "data: [DONE]\n\n" return Response(generate(), content_type='text/event-stream') if __name__ == '__main__': app.run(debug=True, port=7860, host="0.0.0.0")