tomaszki commited on
Commit
8f6976a
·
1 Parent(s): 7633fe1

Support for CPU

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -8,17 +8,17 @@ import numpy as np
8
  import numpyAc
9
 
10
  st.set_page_config(layout="wide")
 
11
 
12
  @st.cache_resource
13
  def load_model():
14
  return AutoModelForCausalLM.from_pretrained(
15
- "codellama/CodeLlama-7b-Python-hf",
16
- device_map='auto',
17
- )
18
 
19
  @st.cache_resource
20
  def load_tokenizer():
21
- return AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Python-hf")
22
 
23
  model = load_model()
24
  tokenizer = load_tokenizer()
@@ -30,7 +30,7 @@ encode_col, decode_col = st.columns(2, gap='medium')
30
  def encode(text):
31
  bar = st.progress(0.0)
32
  codec = numpyAc.arithmeticCoding()
33
- tokenized = tokenizer(text, return_tensors='pt').input_ids.to('cuda')
34
  output = list()
35
  past_key_values = None
36
 
@@ -49,7 +49,7 @@ def encode(text):
49
  output.append(logits)
50
  output = torch.cat(output, dim=0)
51
  output = F.softmax(output, dim=-1)
52
- tokenized = torch.cat((tokenized.squeeze()[1:], torch.tensor([2], device='cuda'))) # Add EOS
53
  tokenized = tokenized.type(torch.int16).cpu().numpy()
54
  byte_stream, _ = codec.encode(output.cpu(), tokenized)
55
  return byte_stream
@@ -64,7 +64,7 @@ def decode(byte_stream):
64
  while input_ids[-1] != 2:
65
  with torch.no_grad():
66
  output = model(
67
- input_ids=torch.tensor([input_ids[-1:]], device='cuda'),
68
  use_cache=True,
69
  past_key_values=past_key_values
70
  )
 
8
  import numpyAc
9
 
10
  st.set_page_config(layout="wide")
11
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
 
13
  @st.cache_resource
14
  def load_model():
15
  return AutoModelForCausalLM.from_pretrained(
16
+ "PY007/TinyLlama-1.1B-python-v0.1",
17
+ ).to(device)
 
18
 
19
  @st.cache_resource
20
  def load_tokenizer():
21
+ return AutoTokenizer.from_pretrained("PY007/TinyLlama-1.1B-python-v0.1")
22
 
23
  model = load_model()
24
  tokenizer = load_tokenizer()
 
30
  def encode(text):
31
  bar = st.progress(0.0)
32
  codec = numpyAc.arithmeticCoding()
33
+ tokenized = tokenizer(text, return_tensors='pt').input_ids.to(device)
34
  output = list()
35
  past_key_values = None
36
 
 
49
  output.append(logits)
50
  output = torch.cat(output, dim=0)
51
  output = F.softmax(output, dim=-1)
52
+ tokenized = torch.cat((tokenized.squeeze()[1:], torch.tensor([2], device=device))) # Add EOS
53
  tokenized = tokenized.type(torch.int16).cpu().numpy()
54
  byte_stream, _ = codec.encode(output.cpu(), tokenized)
55
  return byte_stream
 
64
  while input_ids[-1] != 2:
65
  with torch.no_grad():
66
  output = model(
67
+ input_ids=torch.tensor([input_ids[-1:]], device=device),
68
  use_cache=True,
69
  past_key_values=past_key_values
70
  )