Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import deepspeed
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
import os
|
@@ -24,21 +23,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
24 |
print('Using device:', device)
|
25 |
print()
|
26 |
|
27 |
-
total_mem = 0
|
28 |
-
if device.type == 'cuda':
|
29 |
-
print(torch.cuda.get_device_name(0))
|
30 |
-
total_mem = round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 1)
|
31 |
-
print('Total Memory: ', total_mem, 'GB')
|
32 |
-
|
33 |
-
should_replace_with_kernel_inject = total_mem >= 12
|
34 |
-
print(f'should_replace_with_kernel_inject = {should_replace_with_kernel_inject}')
|
35 |
-
|
36 |
-
ds_engine = deepspeed.init_inference(generator.model,
|
37 |
-
mp_size=world_size,
|
38 |
-
dtype=torch.half,
|
39 |
-
replace_with_kernel_inject=should_replace_with_kernel_inject)
|
40 |
-
generator.model = ds_engine.module
|
41 |
-
|
42 |
# 驻讜谞拽爪讬讬转 讬爪讬专转 讛讟拽住讟
|
43 |
def chat_with_model(history):
|
44 |
prompt = history[-1]["content"]
|
|
|
|
|
1 |
import torch
|
2 |
from transformers import pipeline
|
3 |
import os
|
|
|
23 |
print('Using device:', device)
|
24 |
print()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# 驻讜谞拽爪讬讬转 讬爪讬专转 讛讟拽住讟
|
27 |
def chat_with_model(history):
|
28 |
prompt = history[-1]["content"]
|