Spaces:
Running
on
Zero
Running
on
Zero
from binoculars import Binoculars | |
import torch | |
import gc | |
CHAT_MODEL_PAIR = { | |
"observer": "deepseek-ai/deepseek-llm-7b-base", | |
"performer": "deepseek-ai/deepseek-llm-7b-chat" | |
} | |
CODER_MODEL_PAIR = { | |
"observer": "deepseek-ai/deepseek-llm-7b-base", | |
"performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5" | |
} | |
def initialize_chat_model(): | |
print("Initializing chat Binoculars model...") | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
bino_chat = Binoculars( | |
mode="accuracy", | |
observer_name_or_path=CHAT_MODEL_PAIR["observer"], | |
performer_name_or_path=CHAT_MODEL_PAIR["performer"], | |
max_token_observed=2048 | |
) | |
if torch.cuda.is_available(): | |
print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
return bino_chat | |
def initialize_coder_model(): | |
print("Initializing coder Binoculars model...") | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
bino_coder = Binoculars( | |
mode="accuracy", | |
observer_name_or_path=CODER_MODEL_PAIR["observer"], | |
performer_name_or_path=CODER_MODEL_PAIR["performer"], | |
max_token_observed=2048 | |
) | |
if torch.cuda.is_available(): | |
print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
return bino_coder | |
def compute_chat_score(text): | |
print("Computing chat score...") | |
bino_chat = initialize_chat_model() | |
try: | |
score_chat = bino_chat.compute_score(text) | |
return {"score_chat": score_chat} | |
finally: | |
cleanup_model(bino_chat) | |
def compute_coder_score(text): | |
print("Computing coder score...") | |
bino_coder = initialize_coder_model() | |
try: | |
score_coder = bino_coder.compute_score(text) | |
return {"score_coder": score_coder} | |
finally: | |
cleanup_model(bino_coder) | |
def compute_scores(text, use_chat=True, use_coder=True): | |
scores = {} | |
if use_chat: | |
chat_scores = compute_chat_score(text) | |
scores.update(chat_scores) | |
if use_coder: | |
coder_scores = compute_coder_score(text) | |
scores.update(coder_scores) | |
return scores | |
def cleanup_model(model): | |
if model: | |
try: | |
print(f"Cleaning up model resources...") | |
model.free_memory() | |
gc.collect() | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
torch.cuda.synchronize() | |
print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
except Exception as e: | |
print(f"Error during model cleanup: {str(e)}") | |
def cleanup_models(bino_chat, bino_coder): | |
if bino_chat: | |
cleanup_model(bino_chat) | |
if bino_coder: | |
cleanup_model(bino_coder) |