Yoxas commited on
Commit
0853b21
·
verified ·
1 Parent(s): 384b004

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -1,15 +1,18 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import spaces
 
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
  try:
9
- pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", framework="pt", trust_remote_code=True)
 
10
  except Exception as e:
11
  print(f"Error loading model: {e}")
12
- pipe = None
 
13
 
14
  @spaces.GPU(duration=120)
15
  def respond(
@@ -20,6 +23,9 @@ def respond(
20
  temperature,
21
  top_p,
22
  ):
 
 
 
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
  for val in history:
@@ -30,17 +36,13 @@ def respond(
30
 
31
  messages.append({"role": "user", "content": message})
32
 
 
 
33
  response = ""
34
 
35
- for message in pipe(
36
- messages,
37
- max_length=max_tokens,
38
- do_sample=True,
39
- temperature=temperature,
40
- top_p=top_p,
41
- return_dict_in_generate=True,
42
- ):
43
- token = message["generated_text"]
44
 
45
  response += token
46
  yield response
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import spaces
4
+ import sentencepiece
5
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
  try:
10
+ tokenizer = AutoTokenizer.from_pretrained("Yoxas/autotrain-phi3-statistical", trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained("Yoxas/autotrain-phi3-statistical", trust_remote_code=True)
12
  except Exception as e:
13
  print(f"Error loading model: {e}")
14
+ tokenizer = None
15
+ model = None
16
 
17
  @spaces.GPU(duration=120)
18
  def respond(
 
23
  temperature,
24
  top_p,
25
  ):
26
+ if tokenizer is None or model is None:
27
+ return "Error: Model not loaded properly."
28
+
29
  messages = [{"role": "system", "content": system_message}]
30
 
31
  for val in history:
 
36
 
37
  messages.append({"role": "user", "content": message})
38
 
39
+ inputs = tokenizer(messages, return_tensors="pt", padding=True, truncation=True)
40
+
41
  response = ""
42
 
43
+ for i in range(max_tokens):
44
+ outputs = model.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_length=inputs.input_ids.shape[-1] + 1, temperature=temperature, top_p=top_p)
45
+ token = tokenizer.decode(outputs[0, -1:], skip_special_tokens=True)
 
 
 
 
 
 
46
 
47
  response += token
48
  yield response