lauro1 commited on
Commit
f70abfb
·
1 Parent(s): 7a9deb4

testing low_cpu

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import gradio as gr
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
- model = AutoModelForCausalLM.from_pretrained("EleuterAI/gpt-j-6B")
 
 
5
  tokenizer = AutoTokenizer.from_pretrained("EleuterAI/gpt-j-6B")
6
 
7
  def predict(msg, chat_history):
 
1
  import gradio as gr
2
+ import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ model = AutoModelForCausalLM.from_pretrained("EleuterAI/gpt-j-6B", low_cpu_mem_usage=True)
6
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
+ model.to(device).eval()
8
  tokenizer = AutoTokenizer.from_pretrained("EleuterAI/gpt-j-6B")
9
 
10
  def predict(msg, chat_history):