Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
import re
|
2 |
import threading
|
3 |
import gc
|
|
|
4 |
import torch
|
5 |
|
6 |
import gradio as gr
|
7 |
import spaces
|
8 |
import transformers
|
9 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
|
|
10 |
|
11 |
# λͺ¨λΈ λ©λͺ¨λ¦¬ κ΄λ¦¬ λ° μ΅μ νλ₯Ό μν μ€μ
|
12 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -28,6 +30,17 @@ available_models = {
|
|
28 |
pipe = None
|
29 |
current_model_name = None
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# μ΅μ’
λ΅λ³μ κ°μ§νκΈ° μν λ§μ»€
|
32 |
ANSWER_MARKER = "**λ΅λ³**"
|
33 |
|
@@ -143,6 +156,14 @@ def load_model(model_names):
|
|
143 |
|
144 |
# λͺ¨λΈ λ‘λ (ν¬κΈ°μ λ°λΌ μ΅μ νλ μ€μ μ μ©)
|
145 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# BF16 μ λ°λ μ¬μ© (A100μ μ΅μ ν)
|
147 |
if config["quantization"]:
|
148 |
# μμν μ μ©
|
@@ -159,9 +180,9 @@ def load_model(model_names):
|
|
159 |
torch_dtype=DTYPE,
|
160 |
quantization_config=quantization_config if config["quantization"] else None,
|
161 |
offload_folder="offload" if config["offload"] else None,
|
162 |
-
|
163 |
)
|
164 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
165 |
|
166 |
pipe = pipeline(
|
167 |
"text-generation",
|
@@ -177,7 +198,7 @@ def load_model(model_names):
|
|
177 |
model=model_name,
|
178 |
device_map="auto",
|
179 |
torch_dtype=DTYPE,
|
180 |
-
|
181 |
)
|
182 |
|
183 |
current_model_name = model_name
|
@@ -414,5 +435,12 @@ if __name__ == "__main__":
|
|
414 |
print(f"νμ¬ GPU: {torch.cuda.current_device()}")
|
415 |
print(f"GPU μ΄λ¦: {torch.cuda.get_device_name(0)}")
|
416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
# ν μ¬μ© λ° μ± μ€ν
|
418 |
demo.queue(max_size=10).launch()
|
|
|
1 |
import re
|
2 |
import threading
|
3 |
import gc
|
4 |
+
import os
|
5 |
import torch
|
6 |
|
7 |
import gradio as gr
|
8 |
import spaces
|
9 |
import transformers
|
10 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
11 |
+
from huggingface_hub import login
|
12 |
|
13 |
# λͺ¨λΈ λ©λͺ¨λ¦¬ κ΄λ¦¬ λ° μ΅μ νλ₯Ό μν μ€μ
|
14 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
30 |
pipe = None
|
31 |
current_model_name = None
|
32 |
|
33 |
+
# Hugging Face ν ν°μΌλ‘ λ‘κ·ΈμΈ μλ
|
34 |
+
try:
|
35 |
+
hf_token = os.getenv("HF_TOKEN")
|
36 |
+
if hf_token:
|
37 |
+
login(token=hf_token)
|
38 |
+
print("Hugging Faceμ μ±κ³΅μ μΌλ‘ λ‘κ·ΈμΈνμ΅λλ€.")
|
39 |
+
else:
|
40 |
+
print("κ²½κ³ : HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€.")
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Hugging Face λ‘κ·ΈμΈ μλ¬: {str(e)}")
|
43 |
+
|
44 |
# μ΅μ’
λ΅λ³μ κ°μ§νκΈ° μν λ§μ»€
|
45 |
ANSWER_MARKER = "**λ΅λ³**"
|
46 |
|
|
|
156 |
|
157 |
# λͺ¨λΈ λ‘λ (ν¬κΈ°μ λ°λΌ μ΅μ νλ μ€μ μ μ©)
|
158 |
try:
|
159 |
+
# HF_TOKEN νκ²½ λ³μ νμΈ
|
160 |
+
hf_token = os.getenv("HF_TOKEN")
|
161 |
+
# κ³΅ν΅ λ§€κ°λ³μ
|
162 |
+
common_params = {
|
163 |
+
"token": hf_token, # μ κ·Ό μ ν λͺ¨λΈμ μν ν ν°
|
164 |
+
"trust_remote_code": True,
|
165 |
+
}
|
166 |
+
|
167 |
# BF16 μ λ°λ μ¬μ© (A100μ μ΅μ ν)
|
168 |
if config["quantization"]:
|
169 |
# μμν μ μ©
|
|
|
180 |
torch_dtype=DTYPE,
|
181 |
quantization_config=quantization_config if config["quantization"] else None,
|
182 |
offload_folder="offload" if config["offload"] else None,
|
183 |
+
**common_params
|
184 |
)
|
185 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, **common_params)
|
186 |
|
187 |
pipe = pipeline(
|
188 |
"text-generation",
|
|
|
198 |
model=model_name,
|
199 |
device_map="auto",
|
200 |
torch_dtype=DTYPE,
|
201 |
+
**common_params
|
202 |
)
|
203 |
|
204 |
current_model_name = model_name
|
|
|
435 |
print(f"νμ¬ GPU: {torch.cuda.current_device()}")
|
436 |
print(f"GPU μ΄λ¦: {torch.cuda.get_device_name(0)}")
|
437 |
|
438 |
+
# HF_TOKEN νκ²½ λ³μ νμΈ
|
439 |
+
hf_token = os.getenv("HF_TOKEN")
|
440 |
+
if hf_token:
|
441 |
+
print("HF_TOKEN νκ²½ λ³μκ° μ€μ λμ΄ μμ΅λλ€.")
|
442 |
+
else:
|
443 |
+
print("κ²½κ³ : HF_TOKEN νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€. μ νλ λͺ¨λΈμ μ κ·Όν μ μμ΅λλ€.")
|
444 |
+
|
445 |
# ν μ¬μ© λ° μ± μ€ν
|
446 |
demo.queue(max_size=10).launch()
|