Update app.py
Browse files
app.py
CHANGED
@@ -14,12 +14,30 @@ def load_models():
|
|
14 |
# Text embedding model
|
15 |
embed_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
qa_tokenizer = AutoTokenizer.from_pretrained(
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
return embed_model, summary_model, summary_tokenizer, qa_model, qa_tokenizer
|
25 |
|
@@ -30,16 +48,13 @@ def process_file(uploaded_file):
|
|
30 |
if file_type == 'pdf':
|
31 |
pdf_reader = PdfReader(uploaded_file)
|
32 |
for page in pdf_reader.pages:
|
33 |
-
text += page.extract_text()
|
34 |
-
|
35 |
elif file_type == 'txt':
|
36 |
text = uploaded_file.read().decode('utf-8')
|
37 |
-
|
38 |
elif file_type == 'docx':
|
39 |
doc = Document(uploaded_file)
|
40 |
for para in doc.paragraphs:
|
41 |
text += para.text + "\n"
|
42 |
-
|
43 |
return clean_text(text)
|
44 |
|
45 |
def clean_text(text):
|
@@ -111,4 +126,4 @@ def main():
|
|
111 |
st.info(f"Answer: {answer}")
|
112 |
|
113 |
if __name__ == "__main__":
|
114 |
-
main()
|
|
|
14 |
# Text embedding model
|
15 |
embed_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
|
16 |
|
17 |
+
TOKEN = "TOKEN"
|
18 |
+
|
19 |
+
# IBM Granite models with proper token and trust_remote_code settings
|
20 |
+
summary_tokenizer = AutoTokenizer.from_pretrained(
|
21 |
+
"ibm/granite-13b-instruct-v2",
|
22 |
+
token=TOKEN,
|
23 |
+
trust_remote_code=True
|
24 |
+
)
|
25 |
+
summary_model = AutoModelForCausalLM.from_pretrained(
|
26 |
+
"ibm/granite-13b-instruct-v2",
|
27 |
+
token=TOKEN,
|
28 |
+
trust_remote_code=True
|
29 |
+
)
|
30 |
|
31 |
+
qa_tokenizer = AutoTokenizer.from_pretrained(
|
32 |
+
"ibm/granite-13b-instruct-v2",
|
33 |
+
token=TOKEN,
|
34 |
+
trust_remote_code=True
|
35 |
+
)
|
36 |
+
qa_model = AutoModelForCausalLM.from_pretrained(
|
37 |
+
"ibm/granite-13b-instruct-v2",
|
38 |
+
token=TOKEN,
|
39 |
+
trust_remote_code=True
|
40 |
+
)
|
41 |
|
42 |
return embed_model, summary_model, summary_tokenizer, qa_model, qa_tokenizer
|
43 |
|
|
|
48 |
if file_type == 'pdf':
|
49 |
pdf_reader = PdfReader(uploaded_file)
|
50 |
for page in pdf_reader.pages:
|
51 |
+
text += page.extract_text() or ""
|
|
|
52 |
elif file_type == 'txt':
|
53 |
text = uploaded_file.read().decode('utf-8')
|
|
|
54 |
elif file_type == 'docx':
|
55 |
doc = Document(uploaded_file)
|
56 |
for para in doc.paragraphs:
|
57 |
text += para.text + "\n"
|
|
|
58 |
return clean_text(text)
|
59 |
|
60 |
def clean_text(text):
|
|
|
126 |
st.info(f"Answer: {answer}")
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
+
main()
|