Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from PyPDF2 import PdfReader
|
|
8 |
token = os.environ.get("Key2") # Replace "KEY2" with your secret key name
|
9 |
|
10 |
# Function to query the Hugging Face API
|
11 |
-
def query_huggingface_api(prompt, max_new_tokens=
|
12 |
model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
|
13 |
api_url = f"https://api-inference.huggingface.co/models/{model_name}"
|
14 |
headers = {"Authorization": f"Bearer {token}"}
|
@@ -34,13 +34,19 @@ def detect_language(text):
|
|
34 |
except:
|
35 |
return "en" # Default to English if detection fails
|
36 |
|
37 |
-
# Function to extract text from PDF
|
38 |
def extract_text_from_pdf(pdf_file):
|
39 |
pdf_reader = PdfReader(pdf_file)
|
40 |
-
|
41 |
-
for page in pdf_reader.pages:
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Default system prompts for each query translation method
|
46 |
DEFAULT_SYSTEM_PROMPTS = {
|
@@ -88,7 +94,7 @@ def main():
|
|
88 |
|
89 |
# LLM Parameters
|
90 |
st.sidebar.header("LLM Parameters")
|
91 |
-
max_new_tokens = st.sidebar.slider("Max New Tokens", 10,
|
92 |
temperature = st.sidebar.slider("Temperature", 0.1, 1.0, 0.7)
|
93 |
top_k = st.sidebar.slider("Top K", 1, 100, 50)
|
94 |
|
@@ -133,11 +139,14 @@ def main():
|
|
133 |
if response:
|
134 |
st.write("**Response:**", response)
|
135 |
|
136 |
-
#
|
137 |
if pdf_file is not None:
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
141 |
|
142 |
if __name__ == "__main__":
|
143 |
main()
|
|
|
8 |
token = os.environ.get("Key2") # Replace "KEY2" with your secret key name
|
9 |
|
10 |
# Function to query the Hugging Face API
|
11 |
+
def query_huggingface_api(prompt, max_new_tokens=1000, temperature=0.7, top_k=50):
|
12 |
model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
|
13 |
api_url = f"https://api-inference.huggingface.co/models/{model_name}"
|
14 |
headers = {"Authorization": f"Bearer {token}"}
|
|
|
34 |
except:
|
35 |
return "en" # Default to English if detection fails
|
36 |
|
37 |
+
# Function to extract text from PDF with line and page numbers
|
38 |
def extract_text_from_pdf(pdf_file):
|
39 |
pdf_reader = PdfReader(pdf_file)
|
40 |
+
text_data = []
|
41 |
+
for page_num, page in enumerate(pdf_reader.pages):
|
42 |
+
lines = page.extract_text().split('\n')
|
43 |
+
for line_num, line in enumerate(lines):
|
44 |
+
text_data.append({
|
45 |
+
"page": page_num + 1,
|
46 |
+
"line": line_num + 1,
|
47 |
+
"content": line
|
48 |
+
})
|
49 |
+
return text_data
|
50 |
|
51 |
# Default system prompts for each query translation method
|
52 |
DEFAULT_SYSTEM_PROMPTS = {
|
|
|
94 |
|
95 |
# LLM Parameters
|
96 |
st.sidebar.header("LLM Parameters")
|
97 |
+
max_new_tokens = st.sidebar.slider("Max New Tokens", 10, 1000, 1000)
|
98 |
temperature = st.sidebar.slider("Temperature", 0.1, 1.0, 0.7)
|
99 |
top_k = st.sidebar.slider("Top K", 1, 100, 50)
|
100 |
|
|
|
139 |
if response:
|
140 |
st.write("**Response:**", response)
|
141 |
|
142 |
+
# Process PDF content if uploaded
|
143 |
if pdf_file is not None:
|
144 |
+
pdf_text_data = extract_text_from_pdf(pdf_file)
|
145 |
+
if prompt:
|
146 |
+
# Search for relevant content in the PDF
|
147 |
+
for entry in pdf_text_data:
|
148 |
+
if prompt.lower() in entry["content"].lower():
|
149 |
+
st.write(f"**Page {entry['page']}, Line {entry['line']}:** {entry['content']}")
|
150 |
|
151 |
if __name__ == "__main__":
|
152 |
main()
|