Spaces:
Paused
Paused
cd /etc/nginx/conf.d | |
sudo chown -R ubuntu:ubuntu . | |
cd $HOME | |
printf """ | |
server { | |
listen 80; | |
listen [::]:80; | |
server_name <|_SUBST_PUBLIC_IP|>; # Change this to your domain name | |
location / { # Change this if you'd like to server your Gradio app on a different path | |
proxy_pass http://0.0.0.0:7860/; # Change this if your Gradio app will be running on a different port | |
proxy_redirect off; | |
proxy_http_version 1.1; | |
proxy_set_header Upgrade \$http_upgrade; | |
proxy_set_header Connection \"upgrade\"; | |
proxy_set_header Host \$host; | |
} | |
} | |
""" > temp.conf | |
printf """ | |
ip=\$(dig +short myip.opendns.com @resolver1.opendns.com) | |
sed \"s/<|_SUBST_PUBLIC_IP|>;/\$ip;/g\" /home/ubuntu/temp.conf > /etc/nginx/conf.d/h2ogpt.conf | |
sudo systemctl restart nginx.service | |
""" > run_nginx.sh | |
sudo chmod u+x run_nginx.sh | |
cd /etc/systemd/system | |
sudo chown -R ubuntu:ubuntu . | |
printf """ | |
[Unit] | |
Description=h2oGPT Nginx Server | |
StartLimitIntervalSec=300 | |
StartLimitBurst=5 | |
After=network.target | |
[Service] | |
Type=simple | |
User=ubuntu | |
WorkingDirectory=/home/ubuntu | |
ExecStart=bash /home/ubuntu/run_nginx.sh | |
Restart=always | |
RestartSec=10 | |
[Install] | |
WantedBy=multi-user.target | |
""" > h2ogpt_nginx.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable h2ogpt_nginx.service | |
cd $HOME | |
printf """ | |
tps=\$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if (\$1 > 1) print int(\$1/2); else print 1}') | |
NCCL_IGNORE_DISABLED_P2P=1 CUDA_VISIBLE_DEVICES=\$(seq -s, 0 \$((\$(nvidia-smi -L | wc -l) > 1 ? \$(nvidia-smi -L | wc -l) / 2 - 1 : 0))) /home/ubuntu/vllm/bin/python3.10 -m vllm.entrypoints.openai.api_server \ | |
--port=5000 \ | |
--host=0.0.0.0 \ | |
--model h2oai/h2ogpt-4096-llama2-13b-chat \ | |
--tokenizer=hf-internal-testing/llama-tokenizer \ | |
--tensor-parallel-size=\$tps --seed 1234 | |
""" > run_vllm.sh | |
sudo chmod u+x run_vllm.sh | |
cd /etc/systemd/system | |
sudo chown -R ubuntu:ubuntu . | |
printf """ | |
[Unit] | |
Description=vLLM Server | |
After=network.target | |
[Service] | |
Type=simple | |
User=ubuntu | |
WorkingDirectory=/home/ubuntu | |
ExecStart=bash /home/ubuntu/run_vllm.sh | |
Restart=always | |
[Install] | |
WantedBy=multi-user.target | |
""" > vllm.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable vllm.service | |
cd $HOME/h2ogpt | |
printf """ | |
CUDA_VISIBLE_DEVICES=\$(echo \$(seq -s, \$((\$(nvidia-smi -L | wc -l) / 2)) \$((\$(nvidia-smi -L | wc -l) - 1)))) /home/ubuntu/h2ogpt/venv/bin/python3.10 /home/ubuntu/h2ogpt/generate.py --inference_server="vllm:0.0.0.0:5000" --base_model=h2oai/h2ogpt-4096-llama2-13b-chat --langchain_mode=UserData | |
""" > run_h2ogpt.sh | |
sudo chmod u+x run_h2ogpt.sh | |
cd /etc/systemd/system | |
printf """ | |
[Unit] | |
Description=h2oGPT Server | |
After=network.target | |
[Service] | |
Type=simple | |
User=ubuntu | |
WorkingDirectory=/home/ubuntu/h2ogpt | |
ExecStart=bash /home/ubuntu/h2ogpt/run_h2ogpt.sh | |
[Install] | |
WantedBy=multi-user.target | |
""" > h2ogpt.service | |
sudo systemctl daemon-reload | |
sudo systemctl enable h2ogpt.service | |
cd $HOME | |
sudo rm -rf $HOME/.cache/huggingface/hub/ | |
sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove | |
sudo DEBIAN_FRONTEND=noninteractive apt-get -y clean | |
sudo rm -rf *.deb | |