malvika2003 commited on
Commit
2ecd7b6
·
verified ·
1 Parent(s): 620a079

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
- from transformers import AutoTokenizer
3
- from optimum.intel.openvino import OVModelForCausalLM
4
  import gradio as gr
5
  from threading import Thread
6
  from time import perf_counter
@@ -9,7 +8,7 @@ from transformers import TextIteratorStreamer
9
  import numpy as np
10
 
11
  # Model configuration and loading
12
- model_dir = "C:/Users/KIIT/OneDrive/Desktop/INTEL/phi-2/INT8_compressed_weights"
13
  model_name = "susnato/phi-2"
14
  model_configuration = {
15
  "prompt_template": "{instruction}",
@@ -17,15 +16,10 @@ model_configuration = {
17
  "response_key": "### Response",
18
  "end_key": "### End"
19
  }
20
- ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
21
 
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
 
24
- ov_model = OVModelForCausalLM.from_pretrained(
25
- model_dir,
26
- device="CPU",
27
- ov_config=ov_config,
28
- )
29
  tokenizer_kwargs = model_configuration.get("toeknizer_kwargs", {})
30
  response_key = model_configuration.get("response_key")
31
  tokenizer_response_key = None
@@ -95,7 +89,7 @@ def run_generation(
95
  eos_token_id=end_key_token_id,
96
  pad_token_id=pad_token_id,
97
  )
98
- t = Thread(target=ov_model.generate, kwargs=generate_kwargs)
99
  t.start()
100
  model_output = ""
101
  per_token_time = []
@@ -128,7 +122,7 @@ examples = [
128
  def main():
129
  with gr.Blocks() as demo:
130
  gr.Markdown(
131
- "# Question Answering with Model and OpenVINO.\n"
132
  "Provide instruction which describes a task below or select among predefined examples and model writes response that performs requested task."
133
  )
134
 
@@ -202,3 +196,4 @@ def main():
202
 
203
  if __name__ == "__main__":
204
  main()
 
 
1
  import os
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
  import gradio as gr
4
  from threading import Thread
5
  from time import perf_counter
 
8
  import numpy as np
9
 
10
  # Model configuration and loading
11
+ model_dir = "phi-2/INT8_compressed_weights"
12
  model_name = "susnato/phi-2"
13
  model_configuration = {
14
  "prompt_template": "{instruction}",
 
16
  "response_key": "### Response",
17
  "end_key": "### End"
18
  }
 
19
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
 
22
+ model = AutoModelForCausalLM.from_pretrained(model_dir)
 
 
 
 
23
  tokenizer_kwargs = model_configuration.get("toeknizer_kwargs", {})
24
  response_key = model_configuration.get("response_key")
25
  tokenizer_response_key = None
 
89
  eos_token_id=end_key_token_id,
90
  pad_token_id=pad_token_id,
91
  )
92
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
93
  t.start()
94
  model_output = ""
95
  per_token_time = []
 
122
  def main():
123
  with gr.Blocks() as demo:
124
  gr.Markdown(
125
+ "# Question Answering with Model.\n"
126
  "Provide instruction which describes a task below or select among predefined examples and model writes response that performs requested task."
127
  )
128
 
 
196
 
197
  if __name__ == "__main__":
198
  main()
199
+