import os
os.system("""
# 1. safetensors -> PyTorch 形式に変換
python -c "
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained('deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct')
torch.save(model.state_dict(), 'pytorch_model.bin')
"
# 2. GGUF 形式に変換 (llama.cpp が必要)
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
python convert.py --outtype f16 /app/pytorch_model.bin
""")
from flask import Flask, request, Response, render_template
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
app = Flask(__name__)
# HTML content as a string
HTML_CONTENT = '''
AI Chat Interface
こんにちは!
どのようにお手伝いできますか?
'''
def download_model():
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # 適切なGGUFファイル名に変更
return hf_hub_download(model_name, filename=model_file)
def initialize_model(model_path):
return Llama(
model_path=model_path,
n_ctx=4096,
n_threads=4,
n_gpu_layers=-1 # Use GPU if available
)
model_path = "/app/pytorch_model.bin"
llm = initialize_model(model_path)
system_prompt = (
"You are a helpful AI coding assistant. Your mission is to help people with programming "
"and technical questions, providing clear and concise answers."
)
chat_history = [{"role": "system", "content": system_prompt}]
@app.route('/')
def index():
return HTML_CONTENT
@app.route('/chat')
def chat():
global chat_history
user_message = request.args.get('message', '')
chat_history.append({"role": "user", "content": user_message})
full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
full_prompt += "\nAssistant:"
def generate():
ai_response = ""
for token in llm(full_prompt, max_tokens=1000, stop=["User:"], stream=True):
chunk = token['choices'][0]['text']
if chunk:
ai_response += chunk
yield f"data: {chunk}\n\n"
chat_history.append({"role": "assistant", "content": ai_response.strip()})
if len(chat_history) > 10: # Limit history to last 10 messages
chat_history = chat_history[-10:]
yield "data: [DONE]\n\n"
return Response(generate(), content_type='text/event-stream')
if __name__ == '__main__':
app.run(debug=True, port=7860, host="0.0.0.0")