Rajesh3338 commited on
Commit
86930a3
·
verified ·
1 Parent(s): 6ebaa39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import torch
3
- import spaces
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_community.vectorstores import FAISS
@@ -8,6 +7,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
10
  from langchain_huggingface import HuggingFacePipeline
 
 
 
 
11
 
12
  # Load and process the document
13
  doc_loader = TextLoader("dataset.txt")
@@ -22,18 +25,11 @@ vectordb = FAISS.from_documents(split_docs, embeddings)
22
  # Load model and tokenizer
23
  model_name = "01-ai/Yi-Coder-9B-Chat"
24
  tokenizer = AutoTokenizer.from_pretrained(model_name)
25
-
26
- @spaces.GPU
27
- def setup_model():
28
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
- model = AutoModelForCausalLM.from_pretrained(
30
- model_name,
31
- device_map="auto",
32
- torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
33
- )
34
- return model, device
35
-
36
- model, device = setup_model()
37
 
38
  # Set up the QA pipeline
39
  qa_pipeline = pipeline(
@@ -41,8 +37,7 @@ qa_pipeline = pipeline(
41
  model=model,
42
  tokenizer=tokenizer,
43
  max_new_tokens=750,
44
- pad_token_id=tokenizer.eos_token_id,
45
- device=device
46
  )
47
 
48
  llm = HuggingFacePipeline(pipeline=qa_pipeline)
@@ -67,7 +62,6 @@ def clean_response(response):
67
  return result.split("Answer:")[1].strip()
68
  return result.strip()
69
 
70
- @spaces.GPU
71
  def chatbot_response(user_input):
72
  processed_query = preprocess_query(user_input)
73
  raw_response = qa_chain.invoke({"query": processed_query})
@@ -90,4 +84,4 @@ with gr.Blocks() as chat_interface:
90
 
91
  # Launch the interface
92
  if __name__ == "__main__":
93
- chat_interface.launch()
 
1
  import gradio as gr
2
  import torch
 
3
  from langchain_huggingface import HuggingFaceEmbeddings
4
  from langchain_community.document_loaders import TextLoader
5
  from langchain_community.vectorstores import FAISS
 
7
  from langchain.chains import RetrievalQA
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
  from langchain_huggingface import HuggingFacePipeline
10
+ import spaces
11
+ zero = torch.Tensor([0]).cuda()
12
+ print(zero.device) # This will likely print 'cpu'
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  # Load and process the document
16
  doc_loader = TextLoader("dataset.txt")
 
25
  # Load model and tokenizer
26
  model_name = "01-ai/Yi-Coder-9B-Chat"
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ model_name,
30
+ device_map="auto",
31
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
32
+ )
 
 
 
 
 
 
 
33
 
34
  # Set up the QA pipeline
35
  qa_pipeline = pipeline(
 
37
  model=model,
38
  tokenizer=tokenizer,
39
  max_new_tokens=750,
40
+ pad_token_id=tokenizer.eos_token_id
 
41
  )
42
 
43
  llm = HuggingFacePipeline(pipeline=qa_pipeline)
 
62
  return result.split("Answer:")[1].strip()
63
  return result.strip()
64
 
 
65
  def chatbot_response(user_input):
66
  processed_query = preprocess_query(user_input)
67
  raw_response = qa_chain.invoke({"query": processed_query})
 
84
 
85
  # Launch the interface
86
  if __name__ == "__main__":
87
+ chat_interface.launch()