Commit
·
64ecac0
1
Parent(s):
c3a18e3
Update app.py
Browse files
app.py
CHANGED
@@ -21,25 +21,25 @@ import dotenv
|
|
21 |
from dotenv import load_dotenv
|
22 |
load_dotenv()
|
23 |
|
24 |
-
# Ensure the API token is set
|
25 |
-
huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
26 |
-
if not huggingfacehub_api_token:
|
27 |
-
|
28 |
-
|
29 |
-
# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
|
30 |
-
hub_llm = HuggingFaceHub(
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
)
|
43 |
|
44 |
# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
|
45 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -78,8 +78,33 @@ prompt_example_2 = """Summarize the following document focusing on the key findi
|
|
78 |
|
79 |
Summary:"""
|
80 |
|
81 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
try:
|
84 |
loader = PyPDFLoader(pdf_file.name)
|
85 |
if custom_chunk:
|
|
|
21 |
from dotenv import load_dotenv
|
22 |
load_dotenv()
|
23 |
|
24 |
+
# # Ensure the API token is set
|
25 |
+
# huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
26 |
+
# if not huggingfacehub_api_token:
|
27 |
+
# raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
|
28 |
+
|
29 |
+
# # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
|
30 |
+
# hub_llm = HuggingFaceHub(
|
31 |
+
# repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
|
32 |
+
# model_kwargs={
|
33 |
+
# "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
|
34 |
+
# "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary
|
35 |
+
# "min_length": 30, # Minimum length of the generated summary
|
36 |
+
# "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition)
|
37 |
+
# "top_k": 50, # Consider only the top k most likely tokens when generating
|
38 |
+
# "top_p": 0.95, # Consider tokens with cumulative probability up to top_p
|
39 |
+
# "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
|
40 |
+
# "huggingfacehub_api_token": huggingfacehub_api_token
|
41 |
+
# }
|
42 |
+
# )
|
43 |
|
44 |
# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
|
45 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
78 |
|
79 |
Summary:"""
|
80 |
|
81 |
+
# Initialize hub_llm outside the function, but without the token yet
|
82 |
+
hub_llm = HuggingFaceHub(
|
83 |
+
repo_id="facebook/bart-large-cnn",
|
84 |
+
model_kwargs={
|
85 |
+
"temperature": 0.01,
|
86 |
+
"max_new_tokens": 256 * 2,
|
87 |
+
"min_length": 30,
|
88 |
+
"repetition_penalty": 1.2,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 0.95,
|
91 |
+
"early_stopping": True,
|
92 |
+
# "huggingfacehub_api_token": huggingfacehub_api_token # Add token later
|
93 |
+
}
|
94 |
+
)
|
95 |
+
|
96 |
def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
|
97 |
+
try:
|
98 |
+
# Load .env file and get the token
|
99 |
+
load_dotenv()
|
100 |
+
huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
101 |
+
|
102 |
+
if not huggingfacehub_api_token:
|
103 |
+
raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env")
|
104 |
+
|
105 |
+
# Set the token for hub_llm within the function
|
106 |
+
hub_llm.model_kwargs["huggingfacehub_api_token"] = huggingfacehub_api_token
|
107 |
+
|
108 |
try:
|
109 |
loader = PyPDFLoader(pdf_file.name)
|
110 |
if custom_chunk:
|