husseinhug321 commited on
Commit
6f5ce4c
·
verified ·
1 Parent(s): 0551174

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +165 -0
  2. chains.py +47 -0
  3. local.py +70 -0
  4. prompts.py +53 -0
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+
4
+ from config import SHARE_GRADIO_WITH_PUBLIC_URL
5
+ from chains import qa_chain, summarization_chain
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Translation dictionary
10
+ TRANSLATIONS = {
11
+ "en": {
12
+ "title": "# 📚 Study Buddy: AI Learning Assistant",
13
+ "subtitle": "## 🤖 A smart, user-friendly chatbot for students!",
14
+ "summary_subtitle": "## 📄 Upload Notes for Summarization",
15
+ "chat_input_label": "Type your question here:",
16
+ "chat_placeholder": "e.g., Explain Newton's laws",
17
+ "chat_button_label": "Get Answer",
18
+ "summary_button_label": "Summarize Notes",
19
+ "upload_file_label": "Upload .txt or .pdf file",
20
+ "summary_output_label": "Summary",
21
+ "language_label": "Language / Langue",
22
+ "ai_response_label": "AI Response"
23
+ },
24
+ "fr": {
25
+ "title": "# 📚 Study Buddy: Assistant d'apprentissage IA",
26
+ "subtitle": "## 🤖 Un chatbot intelligent et convivial pour les étudiants!",
27
+ "summary_subtitle": "## 📄 Subir notas para resumir",
28
+ "chat_input_label": "Tapez votre question ici:",
29
+ "chat_placeholder": "ex : Expliquez les lois de Newton",
30
+ "chat_button_label": "Obtenir une réponse",
31
+ "summary_button_label": "Résumer les notes",
32
+ "upload_file_label": "Téléchargez un fichier .txt ou .pdf",
33
+ "summary_output_label": "Résumé",
34
+ "language_label": "Langue / Language",
35
+ "ai_response_label": "Réponse de l'IA"
36
+ }
37
+ }
38
+
39
+ # Function to process user queries
40
+ def chatbot_response(user_input, lang):
41
+ try:
42
+ response = qa_chain.invoke({"question": user_input})
43
+ logger.info("chatbot_response completed")
44
+ return response
45
+ except Exception as e:
46
+ msg = f"Error : {e}"
47
+ logger.exception(msg)
48
+ return TRANSLATIONS[lang].get("error_message", "Sorry, an error occurred while processing your request.")
49
+
50
+ # Function to summarize notes
51
+ def summarize_text(text, lang):
52
+ try:
53
+ summary = summarization_chain.invoke({"document_text": text})
54
+ logger.info("summarize_text completed")
55
+ return summary
56
+ except Exception as e:
57
+ msg = f"Error : {e}"
58
+ logger.exception(msg)
59
+ return TRANSLATIONS[lang].get("error_message", "Sorry, an error occurred while summarizing your notes.")
60
+
61
+ # Function to update UI labels dynamically
62
+ def update_language(lang):
63
+ return (
64
+ TRANSLATIONS[lang]["title"],
65
+ TRANSLATIONS[lang]["subtitle"],
66
+ TRANSLATIONS[lang]["chat_input_label"],
67
+ TRANSLATIONS[lang]["chat_placeholder"],
68
+ TRANSLATIONS[lang]["chat_button_label"],
69
+ TRANSLATIONS[lang]["upload_file_label"],
70
+ TRANSLATIONS[lang]["summary_button_label"],
71
+ TRANSLATIONS[lang]["summary_output_label"],
72
+ TRANSLATIONS[lang]["ai_response_label"]
73
+ )
74
+
75
+ # Gradio UI
76
+ def create_interface():
77
+ with gr.Blocks(css="body { font-family: sans-serif; background-color: #f9f9f9; }") as study_buddy:
78
+
79
+ # Default to English
80
+ lang = "en"
81
+
82
+ title = gr.Markdown(f"{TRANSLATIONS[lang]['title']}")
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ gr.Markdown("", height=4)
87
+
88
+ language = gr.Radio(
89
+ choices=["en", "fr"],
90
+ value=lang,
91
+ label=TRANSLATIONS[lang]["language_label"]
92
+ )
93
+
94
+ gr.Markdown("", height=4)
95
+
96
+ subtitle = gr.Markdown(f"{TRANSLATIONS[lang]['subtitle']}")
97
+
98
+ chat_input = gr.Textbox(
99
+ label=TRANSLATIONS[lang]["chat_input_label"],
100
+ lines=4,
101
+ placeholder=TRANSLATIONS[lang]["chat_placeholder"]
102
+ )
103
+
104
+ with gr.Column():
105
+ gr.Markdown("", height=4)
106
+ summary_subtitle = gr.Markdown(f"{TRANSLATIONS[lang]['summary_subtitle']}")
107
+ file_input = gr.File(file_types=[".pdf", ".txt"])
108
+ file_label = gr.Markdown(TRANSLATIONS[lang]["upload_file_label"]) # Separate label
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+
113
+ chat_button = gr.Button(TRANSLATIONS[lang]["chat_button_label"], variant="primary")
114
+ chat_output = gr.Textbox(label=TRANSLATIONS[lang]["ai_response_label"], lines=5, interactive=True)
115
+
116
+ # Bind chatbot response function
117
+ chat_button.click(
118
+ chatbot_response,
119
+ inputs=[chat_input, language],
120
+ outputs=chat_output
121
+ )
122
+
123
+ with gr.Column():
124
+
125
+ summary_button = gr.Button(TRANSLATIONS[lang]["summary_button_label"], variant="primary")
126
+ summary_output = gr.Textbox(label=TRANSLATIONS[lang]["summary_output_label"], lines=5, interactive=True)
127
+
128
+ # Bind summarization function
129
+ summary_button.click(
130
+ summarize_text,
131
+ inputs=[file_input, language],
132
+ outputs=summary_output
133
+ )
134
+
135
+ # Update labels dynamically when the language changes
136
+ def update_labels(lang):
137
+ return (
138
+ TRANSLATIONS[lang]["title"],
139
+ TRANSLATIONS[lang]["subtitle"],
140
+ TRANSLATIONS[lang]["summary_subtitle"],
141
+ TRANSLATIONS[lang]["chat_input_label"],
142
+ TRANSLATIONS[lang]["chat_placeholder"],
143
+ TRANSLATIONS[lang]["chat_button_label"],
144
+ TRANSLATIONS[lang]["summary_button_label"],
145
+ TRANSLATIONS[lang]["summary_output_label"],
146
+ TRANSLATIONS[lang]["ai_response_label"],
147
+ TRANSLATIONS[lang]["upload_file_label"]
148
+ )
149
+
150
+ language.change(
151
+ update_labels,
152
+ inputs=[language],
153
+ outputs=[
154
+ title, subtitle, summary_subtitle,
155
+ chat_input, chat_input,
156
+ chat_button,
157
+ summary_button, summary_output, chat_output, file_label # Update file label separately
158
+ ]
159
+ )
160
+
161
+ return study_buddy
162
+
163
+ if __name__ == "__main__":
164
+ study_buddy = create_interface()
165
+ study_buddy.launch(share=SHARE_GRADIO_WITH_PUBLIC_URL)
chains.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from llms.local import llm
4
+ # from llms.openai import llm
5
+ from prompts import QA_PROMPT
6
+ from prompts import SUMMARIZATION_PROMPT
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def get_qa_chain():
11
+ logger.info("Creating QA chain")
12
+ print("> Creating QA chain")
13
+
14
+ try:
15
+ # qa_chain = QA_PROMPT
16
+ qa_chain = QA_PROMPT | llm
17
+
18
+ logger.info("QA chain created")
19
+ print("> QA chain created")
20
+ return qa_chain
21
+
22
+ except Exception as e:
23
+ msg = f"Error: {e}"
24
+ logger.exception(msg)
25
+ print(msg)
26
+ raise e
27
+
28
+ def get_summarization_chain():
29
+ logger.info("Creating summarization chain")
30
+ print("> Creating summarization chain")
31
+
32
+ try:
33
+ # summarization_chain = SUMMARIZATION_PROMPT
34
+ summarization_chain = SUMMARIZATION_PROMPT | llm
35
+
36
+ logger.info("Summarization chain created")
37
+ print("> Summarization chain created")
38
+ return summarization_chain
39
+
40
+ except Exception as e:
41
+ msg = f"Error: {e}"
42
+ logger.exception(msg)
43
+ print(msg)
44
+ raise e
45
+
46
+ qa_chain = get_qa_chain()
47
+ summarization_chain = get_summarization_chain()
local.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import logging
3
+
4
+ import torch
5
+ from transformers import (
6
+ AutoModelForCausalLM,
7
+ AutoTokenizer,
8
+ BitsAndBytesConfig,
9
+ AutoTokenizer,
10
+ BitsAndBytesConfig,
11
+ pipeline,
12
+ )
13
+
14
+ from langchain_huggingface import HuggingFacePipeline
15
+ from langchain.globals import set_debug
16
+ from langchain.globals import set_verbose
17
+
18
+ from config import HF_MODEL_ID
19
+ from config import LLM_VERBOSE
20
+
21
+ set_verbose(LLM_VERBOSE)
22
+ set_debug(LLM_VERBOSE)
23
+
24
+ logger = logging.getLogger(__name__)
25
+ load_dotenv()
26
+
27
+ cuda_check = torch.cuda.is_available()
28
+ logger.info(f"torch.cuda.is_available : {cuda_check}")
29
+ print(f"> torch.cuda.is_available : {cuda_check}")
30
+
31
+ # Load Llama3 model and tokenizer
32
+ model_id = HF_MODEL_ID
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
35
+
36
+ # BitsAndBytesConfig int-4 config
37
+ # device_map = {"": 0}
38
+ device_map = "auto"
39
+ compute_dtype = getattr(torch, "float16")
40
+ bnb_config = BitsAndBytesConfig(
41
+ load_in_4bit=True,
42
+ bnb_4bit_use_double_quant=False,
43
+ bnb_4bit_quant_type="nf4",
44
+ # bnb_4bit_compute_dtype=torch.bfloat16
45
+ bnb_4bit_compute_dtype=compute_dtype,
46
+ # bnb_4bit_use_double_quant=False,
47
+ )
48
+
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ model_id,
51
+ device_map=device_map,
52
+ # attn_implementation="flash_attention_2",
53
+ quantization_config=bnb_config,
54
+ )
55
+
56
+ model.generation_config.pad_token_id = tokenizer.eos_token_id
57
+
58
+ pipe = pipeline(
59
+ "text-generation",
60
+ model=model,
61
+ tokenizer=tokenizer,
62
+ max_new_tokens=50,
63
+ return_full_text=False,
64
+ num_return_sequences=1,
65
+ eos_token_id=tokenizer.eos_token_id,
66
+ temperature=0.0001,
67
+ do_sample=True,
68
+ )
69
+
70
+ llm = HuggingFacePipeline(pipeline=pipe)
prompts.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+
3
+ qa_template = (
4
+ """
5
+ <|start_header_id|>system<|end_header_id|>
6
+ You are an AI assistant specialized in answering academic queries with accuracy and clarity.
7
+
8
+ ### Instructions
9
+ - Read the user's question carefully.
10
+ - Provide a concise and informative answer based on available knowledge.
11
+ - If the question is unclear, ask for clarification.
12
+ - If you do not know the answer, respond with "I'm not sure."
13
+
14
+ ### User Question
15
+ {question}
16
+
17
+ <|eot_id|><|start_header_id|>user<|end_header_id|>
18
+
19
+ Question: Please provide a detailed answer.
20
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
21
+ Answer:
22
+ """)
23
+
24
+ summarization_template = (
25
+ """
26
+ <|start_header_id|>system<|end_header_id|>
27
+ You are a highly skilled AI that specializes in summarizing academic content.
28
+
29
+ ### Instructions
30
+ - Read the provided text carefully.
31
+ - Extract the most important key points while maintaining clarity.
32
+ - Summarize in a way that retains the core meaning but is concise.
33
+ - Output only the summary without additional explanations.
34
+
35
+ ### Text to Summarize
36
+ {document_text}
37
+
38
+ <|eot_id|><|start_header_id|>user<|end_header_id|>
39
+
40
+ Task: Provide a concise summary.
41
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
42
+ Summary:
43
+ """)
44
+
45
+ QA_PROMPT = PromptTemplate(
46
+ input_variables=["question"],
47
+ template=qa_template
48
+ )
49
+
50
+ SUMMARIZATION_PROMPT = PromptTemplate(
51
+ input_variables=["document_text"],
52
+ template=summarization_template
53
+ )