Francesco-A commited on
Commit
3f1d535
·
1 Parent(s): 6346352

secrets_fix

Browse files
Files changed (1) hide show
  1. app.py +28 -62
app.py CHANGED
@@ -21,25 +21,19 @@ import dotenv
21
  from dotenv import load_dotenv
22
  load_dotenv()
23
 
24
- # # Ensure the API token is set
25
- # huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
26
- # if not huggingfacehub_api_token:
27
- # raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable.")
28
-
29
- # # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
30
- # hub_llm = HuggingFaceHub(
31
- # repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
32
- # model_kwargs={
33
- # "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
34
- # "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary
35
- # "min_length": 30, # Minimum length of the generated summary
36
- # "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition)
37
- # "top_k": 50, # Consider only the top k most likely tokens when generating
38
- # "top_p": 0.95, # Consider tokens with cumulative probability up to top_p
39
- # "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
40
- # "huggingfacehub_api_token": huggingfacehub_api_token
41
- # }
42
- # )
43
 
44
  # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
45
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -78,51 +72,23 @@ prompt_example_2 = """Summarize the following document focusing on the key findi
78
 
79
  Summary:"""
80
 
81
- # Initialize hub_llm outside the function, but without the token yet
82
- hub_llm = HuggingFaceHub(
83
- repo_id="facebook/bart-large-cnn",
84
- model_kwargs={
85
- "temperature": 0.01,
86
- "max_new_tokens": 256 * 2,
87
- "min_length": 30,
88
- "repetition_penalty": 1.2,
89
- "top_k": 50,
90
- "top_p": 0.95,
91
- "early_stopping": True,
92
- # "huggingfacehub_api_token": huggingfacehub_api_token # Add token later
93
- }
94
- )
95
-
96
  def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
97
  try:
98
- # Load .env file and get the token
99
- load_dotenv()
100
- huggingfacehub_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
101
-
102
- if not huggingfacehub_api_token:
103
- raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env")
104
-
105
- # Set the token for hub_llm within the function
106
- hub_llm.model_kwargs["huggingfacehub_api_token"] = huggingfacehub_api_token
107
-
108
- try:
109
- loader = PyPDFLoader(pdf_file.name)
110
- if custom_chunk:
111
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
112
- docs = loader.load_and_split(text_splitter=text_splitter)
113
- else:
114
- docs = loader.load_and_split()
115
-
116
- PROMPT = PromptTemplate(template=custom_prompt, input_variables=['text'])
117
- chain = load_summarize_chain(hub_llm, chain_type='map_reduce', combine_prompt=PROMPT)
118
-
119
- # Introduce a delay before calling the API
120
- time.sleep(1)
121
- summary = chain.run(docs)
122
- return summary
123
- except Exception as e:
124
- return f"An error occurred: {e}"
125
-
126
  except Exception as e:
127
  return f"An error occurred: {e}"
128
 
 
21
  from dotenv import load_dotenv
22
  load_dotenv()
23
 
24
+ # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5
25
+ hub_llm = HuggingFaceHub(
26
+ repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum"
27
+ model_kwargs={
28
+ "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random)
29
+ "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary
30
+ "min_length": 30, # Minimum length of the generated summary
31
+ "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition)
32
+ "top_k": 50, # Consider only the top k most likely tokens when generating
33
+ "top_p": 0.95, # Consider tokens with cumulative probability up to top_p
34
+ "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token)
35
+ }
36
+ )
 
 
 
 
 
 
37
 
38
  # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15
39
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
72
 
73
  Summary:"""
74
 
75
+ # Implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap):
77
  try:
78
+ loader = PyPDFLoader(pdf_file.name)
79
+ if custom_chunk:
80
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
81
+ docs = loader.load_and_split(text_splitter=text_splitter)
82
+ else:
83
+ docs = loader.load_and_split()
84
+
85
+ PROMPT = PromptTemplate(template=custom_prompt, input_variables=['text'])
86
+ chain = load_summarize_chain(hub_llm, chain_type='map_reduce', combine_prompt=PROMPT)
87
+
88
+ # Introduce a delay before calling the API
89
+ time.sleep(1)
90
+ summary = chain.run(docs)
91
+ return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
  return f"An error occurred: {e}"
94