Update app.py
Browse files
app.py
CHANGED
@@ -11,10 +11,9 @@ MAX_MAX_NEW_TOKENS = 2048
|
|
11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
13 |
|
14 |
-
|
15 |
#if torch.cuda.is_available():
|
16 |
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
17 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
18 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
19 |
tokenizer.use_default_system_prompt = False
|
20 |
|
|
|
11 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
13 |
|
|
|
14 |
#if torch.cuda.is_available():
|
15 |
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True, token=HF_TOKEN, device_map="auto")
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
18 |
tokenizer.use_default_system_prompt = False
|
19 |
|