bainskarman commited on
Commit
a1fd273
·
verified ·
1 Parent(s): 5e06280

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -11
app.py CHANGED
@@ -8,7 +8,7 @@ from PyPDF2 import PdfReader
8
  token = os.environ.get("Key2") # Replace "KEY2" with your secret key name
9
 
10
  # Function to query the Hugging Face API
11
- def query_huggingface_api(prompt, max_new_tokens=50, temperature=0.7, top_k=50):
12
  model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
13
  api_url = f"https://api-inference.huggingface.co/models/{model_name}"
14
  headers = {"Authorization": f"Bearer {token}"}
@@ -34,13 +34,19 @@ def detect_language(text):
34
  except:
35
  return "en" # Default to English if detection fails
36
 
37
- # Function to extract text from PDF
38
  def extract_text_from_pdf(pdf_file):
39
  pdf_reader = PdfReader(pdf_file)
40
- text = ""
41
- for page in pdf_reader.pages:
42
- text += page.extract_text()
43
- return text
 
 
 
 
 
 
44
 
45
  # Default system prompts for each query translation method
46
  DEFAULT_SYSTEM_PROMPTS = {
@@ -88,7 +94,7 @@ def main():
88
 
89
  # LLM Parameters
90
  st.sidebar.header("LLM Parameters")
91
- max_new_tokens = st.sidebar.slider("Max New Tokens", 10, 100, 50)
92
  temperature = st.sidebar.slider("Temperature", 0.1, 1.0, 0.7)
93
  top_k = st.sidebar.slider("Top K", 1, 100, 50)
94
 
@@ -133,11 +139,14 @@ def main():
133
  if response:
134
  st.write("**Response:**", response)
135
 
136
- # Display PDF text if uploaded
137
  if pdf_file is not None:
138
- st.header("PDF Content")
139
- pdf_text = extract_text_from_pdf(pdf_file)
140
- st.write(pdf_text)
 
 
 
141
 
142
  if __name__ == "__main__":
143
  main()
 
8
  token = os.environ.get("Key2") # Replace "KEY2" with your secret key name
9
 
10
  # Function to query the Hugging Face API
11
+ def query_huggingface_api(prompt, max_new_tokens=1000, temperature=0.7, top_k=50):
12
  model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
13
  api_url = f"https://api-inference.huggingface.co/models/{model_name}"
14
  headers = {"Authorization": f"Bearer {token}"}
 
34
  except:
35
  return "en" # Default to English if detection fails
36
 
37
+ # Function to extract text from PDF with line and page numbers
38
  def extract_text_from_pdf(pdf_file):
39
  pdf_reader = PdfReader(pdf_file)
40
+ text_data = []
41
+ for page_num, page in enumerate(pdf_reader.pages):
42
+ lines = page.extract_text().split('\n')
43
+ for line_num, line in enumerate(lines):
44
+ text_data.append({
45
+ "page": page_num + 1,
46
+ "line": line_num + 1,
47
+ "content": line
48
+ })
49
+ return text_data
50
 
51
  # Default system prompts for each query translation method
52
  DEFAULT_SYSTEM_PROMPTS = {
 
94
 
95
  # LLM Parameters
96
  st.sidebar.header("LLM Parameters")
97
+ max_new_tokens = st.sidebar.slider("Max New Tokens", 10, 1000, 1000)
98
  temperature = st.sidebar.slider("Temperature", 0.1, 1.0, 0.7)
99
  top_k = st.sidebar.slider("Top K", 1, 100, 50)
100
 
 
139
  if response:
140
  st.write("**Response:**", response)
141
 
142
+ # Process PDF content if uploaded
143
  if pdf_file is not None:
144
+ pdf_text_data = extract_text_from_pdf(pdf_file)
145
+ if prompt:
146
+ # Search for relevant content in the PDF
147
+ for entry in pdf_text_data:
148
+ if prompt.lower() in entry["content"].lower():
149
+ st.write(f"**Page {entry['page']}, Line {entry['line']}:** {entry['content']}")
150
 
151
  if __name__ == "__main__":
152
  main()