Spaces:

Francesco-A
/

LangchainSummarization-v1

Sleeping

App Files Files Community

Francesco-A commited on Dec 8, 2024

Commit

64ecac0

1 Parent(s): c3a18e3

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -20

app.py CHANGED Viewed

@@ -21,25 +21,25 @@ import dotenv
 from dotenv import load_dotenv
 load_dotenv()
-# Ensure the API token is set
-huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
-if not huggingfacehub_api_token:
-  raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
-# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
-hub_llm = HuggingFaceHub(
-    repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
-    model_kwargs={
-        "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
-        "max_new_tokens": 256*2,  # Maximum number of tokens to generate in the summary
-        "min_length": 30,  # Minimum length of the generated summary
-        "repetition_penalty": 1.2,  # Penalizes repeated tokens (higher value = less repetition)
-        "top_k": 50,  # Consider only the top k most likely tokens when generating
-        "top_p": 0.95,  # Consider tokens with cumulative probability up to top_p
-        "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
-        "huggingfacehub_api_token": huggingfacehub_api_token
-    }
-)
 # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -78,8 +78,33 @@ prompt_example_2 = """Summarize the following document focusing on the key findi
 Summary:"""
-# Implementation
 def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
     try:
         loader = PyPDFLoader(pdf_file.name)
         if custom_chunk:

 from dotenv import load_dotenv
 load_dotenv()
+# # Ensure the API token is set
+# huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+# if not huggingfacehub_api_token:
+#   raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
+# # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
+# hub_llm = HuggingFaceHub(
+#     repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
+#     model_kwargs={
+#         "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
+#         "max_new_tokens": 256*2,  # Maximum number of tokens to generate in the summary
+#         "min_length": 30,  # Minimum length of the generated summary
+#         "repetition_penalty": 1.2,  # Penalizes repeated tokens (higher value = less repetition)
+#         "top_k": 50,  # Consider only the top k most likely tokens when generating
+#         "top_p": 0.95,  # Consider tokens with cumulative probability up to top_p
+#         "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
+#         "huggingfacehub_api_token": huggingfacehub_api_token
+#     }
+# )
 # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 Summary:"""
+# Initialize hub_llm outside the function, but without the token yet
+hub_llm = HuggingFaceHub(
+    repo_id="facebook/bart-large-cnn",
+    model_kwargs={
+        "temperature": 0.01,
+        "max_new_tokens": 256 * 2,
+        "min_length": 30,
+        "repetition_penalty": 1.2,
+        "top_k": 50,
+        "top_p": 0.95,
+        "early_stopping": True,
+        # "huggingfacehub_api_token": huggingfacehub_api_token  # Add token later
+    }
+)
 def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
+    try:
+        # Load .env file and get the token
+        load_dotenv()
+        huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+        if not huggingfacehub_api_token:
+            raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env")
+        # Set the token for hub_llm within the function
+        hub_llm.model_kwargs["huggingfacehub_api_token"] = huggingfacehub_api_token
     try:
         loader = PyPDFLoader(pdf_file.name)
         if custom_chunk: