sunbal7 commited on
Commit
7bea7bd
·
verified ·
1 Parent(s): c3cc01e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -10
app.py CHANGED
@@ -14,12 +14,30 @@ def load_models():
14
  # Text embedding model
15
  embed_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
16
 
17
- # IBM Granite models
18
- summary_tokenizer = AutoTokenizer.from_pretrained("ibm/granite-13b-instruct-v2")
19
- summary_model = AutoModelForCausalLM.from_pretrained("ibm/granite-13b-instruct-v2")
 
 
 
 
 
 
 
 
 
 
20
 
21
- qa_tokenizer = AutoTokenizer.from_pretrained("ibm/granite-13b-instruct-v2")
22
- qa_model = AutoModelForCausalLM.from_pretrained("ibm/granite-13b-instruct-v2")
 
 
 
 
 
 
 
 
23
 
24
  return embed_model, summary_model, summary_tokenizer, qa_model, qa_tokenizer
25
 
@@ -30,16 +48,13 @@ def process_file(uploaded_file):
30
  if file_type == 'pdf':
31
  pdf_reader = PdfReader(uploaded_file)
32
  for page in pdf_reader.pages:
33
- text += page.extract_text()
34
-
35
  elif file_type == 'txt':
36
  text = uploaded_file.read().decode('utf-8')
37
-
38
  elif file_type == 'docx':
39
  doc = Document(uploaded_file)
40
  for para in doc.paragraphs:
41
  text += para.text + "\n"
42
-
43
  return clean_text(text)
44
 
45
  def clean_text(text):
@@ -111,4 +126,4 @@ def main():
111
  st.info(f"Answer: {answer}")
112
 
113
  if __name__ == "__main__":
114
- main()
 
14
  # Text embedding model
15
  embed_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
16
 
17
+ TOKEN = "TOKEN"
18
+
19
+ # IBM Granite models with proper token and trust_remote_code settings
20
+ summary_tokenizer = AutoTokenizer.from_pretrained(
21
+ "ibm/granite-13b-instruct-v2",
22
+ token=TOKEN,
23
+ trust_remote_code=True
24
+ )
25
+ summary_model = AutoModelForCausalLM.from_pretrained(
26
+ "ibm/granite-13b-instruct-v2",
27
+ token=TOKEN,
28
+ trust_remote_code=True
29
+ )
30
 
31
+ qa_tokenizer = AutoTokenizer.from_pretrained(
32
+ "ibm/granite-13b-instruct-v2",
33
+ token=TOKEN,
34
+ trust_remote_code=True
35
+ )
36
+ qa_model = AutoModelForCausalLM.from_pretrained(
37
+ "ibm/granite-13b-instruct-v2",
38
+ token=TOKEN,
39
+ trust_remote_code=True
40
+ )
41
 
42
  return embed_model, summary_model, summary_tokenizer, qa_model, qa_tokenizer
43
 
 
48
  if file_type == 'pdf':
49
  pdf_reader = PdfReader(uploaded_file)
50
  for page in pdf_reader.pages:
51
+ text += page.extract_text() or ""
 
52
  elif file_type == 'txt':
53
  text = uploaded_file.read().decode('utf-8')
 
54
  elif file_type == 'docx':
55
  doc = Document(uploaded_file)
56
  for para in doc.paragraphs:
57
  text += para.text + "\n"
 
58
  return clean_text(text)
59
 
60
  def clean_text(text):
 
126
  st.info(f"Answer: {answer}")
127
 
128
  if __name__ == "__main__":
129
+ main()