Francesco-A commited on
Commit
64ecac0
·
1 Parent(s): c3a18e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -20
app.py CHANGED
@@ -21,25 +21,25 @@ import dotenv
21
  from dotenv import load_dotenv
22
  load_dotenv()
23
 
24
- # Ensure the API token is set
25
- huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
26
- if not huggingfacehub_api_token:
27
- raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
28
-
29
- # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
30
- hub_llm = HuggingFaceHub(
31
- repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
32
- model_kwargs={
33
- "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
34
- "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary
35
- "min_length": 30, # Minimum length of the generated summary
36
- "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition)
37
- "top_k": 50, # Consider only the top k most likely tokens when generating
38
- "top_p": 0.95, # Consider tokens with cumulative probability up to top_p
39
- "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
40
- "huggingfacehub_api_token": huggingfacehub_api_token
41
- }
42
- )
43
 
44
  # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
45
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -78,8 +78,33 @@ prompt_example_2 = """Summarize the following document focusing on the key findi
78
 
79
  Summary:"""
80
 
81
- # Implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
 
 
 
 
 
 
 
 
 
 
 
83
  try:
84
  loader = PyPDFLoader(pdf_file.name)
85
  if custom_chunk:
 
21
  from dotenv import load_dotenv
22
  load_dotenv()
23
 
24
+ # # Ensure the API token is set
25
+ # huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
26
+ # if not huggingfacehub_api_token:
27
+ # raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
28
+
29
+ # # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
30
+ # hub_llm = HuggingFaceHub(
31
+ # repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
32
+ # model_kwargs={
33
+ # "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
34
+ # "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary
35
+ # "min_length": 30, # Minimum length of the generated summary
36
+ # "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition)
37
+ # "top_k": 50, # Consider only the top k most likely tokens when generating
38
+ # "top_p": 0.95, # Consider tokens with cumulative probability up to top_p
39
+ # "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
40
+ # "huggingfacehub_api_token": huggingfacehub_api_token
41
+ # }
42
+ # )
43
 
44
  # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
45
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
78
 
79
  Summary:"""
80
 
81
+ # Initialize hub_llm outside the function, but without the token yet
82
+ hub_llm = HuggingFaceHub(
83
+ repo_id="facebook/bart-large-cnn",
84
+ model_kwargs={
85
+ "temperature": 0.01,
86
+ "max_new_tokens": 256 * 2,
87
+ "min_length": 30,
88
+ "repetition_penalty": 1.2,
89
+ "top_k": 50,
90
+ "top_p": 0.95,
91
+ "early_stopping": True,
92
+ # "huggingfacehub_api_token": huggingfacehub_api_token # Add token later
93
+ }
94
+ )
95
+
96
  def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
97
+ try:
98
+ # Load .env file and get the token
99
+ load_dotenv()
100
+ huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
101
+
102
+ if not huggingfacehub_api_token:
103
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env")
104
+
105
+ # Set the token for hub_llm within the function
106
+ hub_llm.model_kwargs["huggingfacehub_api_token"] = huggingfacehub_api_token
107
+
108
  try:
109
  loader = PyPDFLoader(pdf_file.name)
110
  if custom_chunk: