openfree commited on
Commit
89817e2
Β·
verified Β·
1 Parent(s): 03434f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import re
2
  import threading
3
  import gc
 
4
  import torch
5
 
6
  import gradio as gr
7
  import spaces
8
  import transformers
9
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 
10
 
11
  # λͺ¨λΈ λ©”λͺ¨λ¦¬ 관리 및 μ΅œμ ν™”λ₯Ό μœ„ν•œ μ„€μ •
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -28,6 +30,17 @@ available_models = {
28
  pipe = None
29
  current_model_name = None
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  # μ΅œμ’… 닡변을 κ°μ§€ν•˜κΈ° μœ„ν•œ 마컀
32
  ANSWER_MARKER = "**λ‹΅λ³€**"
33
 
@@ -143,6 +156,14 @@ def load_model(model_names):
143
 
144
  # λͺ¨λΈ λ‘œλ“œ (크기에 따라 μ΅œμ ν™”λœ μ„€μ • 적용)
145
  try:
 
 
 
 
 
 
 
 
146
  # BF16 정밀도 μ‚¬μš© (A100에 μ΅œμ ν™”)
147
  if config["quantization"]:
148
  # μ–‘μžν™” 적용
@@ -159,9 +180,9 @@ def load_model(model_names):
159
  torch_dtype=DTYPE,
160
  quantization_config=quantization_config if config["quantization"] else None,
161
  offload_folder="offload" if config["offload"] else None,
162
- trust_remote_code=True
163
  )
164
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
165
 
166
  pipe = pipeline(
167
  "text-generation",
@@ -177,7 +198,7 @@ def load_model(model_names):
177
  model=model_name,
178
  device_map="auto",
179
  torch_dtype=DTYPE,
180
- trust_remote_code=True
181
  )
182
 
183
  current_model_name = model_name
@@ -414,5 +435,12 @@ if __name__ == "__main__":
414
  print(f"ν˜„μž¬ GPU: {torch.cuda.current_device()}")
415
  print(f"GPU 이름: {torch.cuda.get_device_name(0)}")
416
 
 
 
 
 
 
 
 
417
  # 큐 μ‚¬μš© 및 μ•± μ‹€ν–‰
418
  demo.queue(max_size=10).launch()
 
1
  import re
2
  import threading
3
  import gc
4
+ import os
5
  import torch
6
 
7
  import gradio as gr
8
  import spaces
9
  import transformers
10
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
11
+ from huggingface_hub import login
12
 
13
  # λͺ¨λΈ λ©”λͺ¨λ¦¬ 관리 및 μ΅œμ ν™”λ₯Ό μœ„ν•œ μ„€μ •
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
30
  pipe = None
31
  current_model_name = None
32
 
33
+ # Hugging Face ν† ν°μœΌλ‘œ 둜그인 μ‹œλ„
34
+ try:
35
+ hf_token = os.getenv("HF_TOKEN")
36
+ if hf_token:
37
+ login(token=hf_token)
38
+ print("Hugging Face에 μ„±κ³΅μ μœΌλ‘œ λ‘œκ·ΈμΈν–ˆμŠ΅λ‹ˆλ‹€.")
39
+ else:
40
+ print("κ²½κ³ : HF_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
41
+ except Exception as e:
42
+ print(f"Hugging Face 둜그인 μ—λŸ¬: {str(e)}")
43
+
44
  # μ΅œμ’… 닡변을 κ°μ§€ν•˜κΈ° μœ„ν•œ 마컀
45
  ANSWER_MARKER = "**λ‹΅λ³€**"
46
 
 
156
 
157
  # λͺ¨λΈ λ‘œλ“œ (크기에 따라 μ΅œμ ν™”λœ μ„€μ • 적용)
158
  try:
159
+ # HF_TOKEN ν™˜κ²½ λ³€μˆ˜ 확인
160
+ hf_token = os.getenv("HF_TOKEN")
161
+ # 곡톡 λ§€κ°œλ³€μˆ˜
162
+ common_params = {
163
+ "token": hf_token, # μ ‘κ·Ό μ œν•œ λͺ¨λΈμ„ μœ„ν•œ 토큰
164
+ "trust_remote_code": True,
165
+ }
166
+
167
  # BF16 정밀도 μ‚¬μš© (A100에 μ΅œμ ν™”)
168
  if config["quantization"]:
169
  # μ–‘μžν™” 적용
 
180
  torch_dtype=DTYPE,
181
  quantization_config=quantization_config if config["quantization"] else None,
182
  offload_folder="offload" if config["offload"] else None,
183
+ **common_params
184
  )
185
+ tokenizer = AutoTokenizer.from_pretrained(model_name, **common_params)
186
 
187
  pipe = pipeline(
188
  "text-generation",
 
198
  model=model_name,
199
  device_map="auto",
200
  torch_dtype=DTYPE,
201
+ **common_params
202
  )
203
 
204
  current_model_name = model_name
 
435
  print(f"ν˜„μž¬ GPU: {torch.cuda.current_device()}")
436
  print(f"GPU 이름: {torch.cuda.get_device_name(0)}")
437
 
438
+ # HF_TOKEN ν™˜κ²½ λ³€μˆ˜ 확인
439
+ hf_token = os.getenv("HF_TOKEN")
440
+ if hf_token:
441
+ print("HF_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
442
+ else:
443
+ print("κ²½κ³ : HF_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. μ œν•œλœ λͺ¨λΈμ— μ ‘κ·Όν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
444
+
445
  # 큐 μ‚¬μš© 및 μ•± μ‹€ν–‰
446
  demo.queue(max_size=10).launch()