Spaces:

Francesco-A
/

LangchainSummarization-v1

Sleeping

App Files Files Community

Francesco-A commited on Dec 15, 2024

Commit

3f1d535

1 Parent(s): 6346352

secrets_fix

Browse files

Files changed (1) hide show

app.py +28 -62

app.py CHANGED Viewed

@@ -21,25 +21,19 @@ import dotenv
 from dotenv import load_dotenv
 load_dotenv()
-# # Ensure the API token is set
-# huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
-# if not huggingfacehub_api_token:
-#   raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
-# # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
-# hub_llm = HuggingFaceHub(
-#     repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
-#     model_kwargs={
-#         "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
-#         "max_new_tokens": 256*2,  # Maximum number of tokens to generate in the summary
-#         "min_length": 30,  # Minimum length of the generated summary
-#         "repetition_penalty": 1.2,  # Penalizes repeated tokens (higher value = less repetition)
-#         "top_k": 50,  # Consider only the top k most likely tokens when generating
-#         "top_p": 0.95,  # Consider tokens with cumulative probability up to top_p
-#         "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
-#         "huggingfacehub_api_token": huggingfacehub_api_token
-#     }
-# )
 # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -78,51 +72,23 @@ prompt_example_2 = """Summarize the following document focusing on the key findi
 Summary:"""
-# Initialize hub_llm outside the function, but without the token yet
-hub_llm = HuggingFaceHub(
-    repo_id="facebook/bart-large-cnn",
-    model_kwargs={
-        "temperature": 0.01,
-        "max_new_tokens": 256 * 2,
-        "min_length": 30,
-        "repetition_penalty": 1.2,
-        "top_k": 50,
-        "top_p": 0.95,
-        "early_stopping": True,
-        # "huggingfacehub_api_token": huggingfacehub_api_token  # Add token later
-    }
-)
 def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
     try:
-        # Load .env file and get the token
-        load_dotenv()
-        huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
-        if not huggingfacehub_api_token:
-            raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env")
-        # Set the token for hub_llm within the function
-        hub_llm.model_kwargs["huggingfacehub_api_token"] = huggingfacehub_api_token
-        try:
-            loader = PyPDFLoader(pdf_file.name)
-            if custom_chunk:
-                text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-                docs = loader.load_and_split(text_splitter=text_splitter)
-            else:
-                docs = loader.load_and_split()
-            PROMPT = PromptTemplate(template=custom_prompt, input_variables=['text'])
-            chain = load_summarize_chain(hub_llm, chain_type='map_reduce', combine_prompt=PROMPT)
-            # Introduce a delay before calling the API
-            time.sleep(1)
-            summary = chain.run(docs)
-            return summary
-        except Exception as e:
-            return f"An error occurred: {e}"
     except Exception as e:
         return f"An error occurred: {e}"

 from dotenv import load_dotenv
 load_dotenv()
+# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
+hub_llm = HuggingFaceHub(
+    repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
+    model_kwargs={
+        "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
+        "max_new_tokens": 256*2,  # Maximum number of tokens to generate in the summary
+        "min_length": 30,  # Minimum length of the generated summary
+        "repetition_penalty": 1.2,  # Penalizes repeated tokens (higher value = less repetition)
+        "top_k": 50,  # Consider only the top k most likely tokens when generating
+        "top_p": 0.95,  # Consider tokens with cumulative probability up to top_p
+        "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
+    }
+)
 # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 Summary:"""
+# Implementation
 def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
     try:
+        loader = PyPDFLoader(pdf_file.name)
+        if custom_chunk:
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+            docs = loader.load_and_split(text_splitter=text_splitter)
+        else:
+            docs = loader.load_and_split()
+        PROMPT = PromptTemplate(template=custom_prompt, input_variables=['text'])
+        chain = load_summarize_chain(hub_llm, chain_type='map_reduce', combine_prompt=PROMPT)
+        # Introduce a delay before calling the API
+        time.sleep(1)
+        summary = chain.run(docs)
+        return summary
     except Exception as e:
         return f"An error occurred: {e}"