Anustup commited on
Commit
fb200df
·
verified ·
1 Parent(s): 0bb26da

Update summarizer.py

Browse files
Files changed (1) hide show
  1. summarizer.py +36 -36
summarizer.py CHANGED
@@ -1,36 +1,36 @@
1
- import os
2
- import base64
3
- from langchain.docstore.document import Document
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.llms.openai import OpenAI
6
- from langchain.chains.summarize import load_summarize_chain
7
- from langchain.document_loaders import UnstructuredURLLoader
8
- import nltk
9
- import openai
10
-
11
- nltk.download('punkt')
12
- OPENAI_API_KEY = "sk-proj-uCiflA45fuchFdjkbNJ7T3BlbkFJF5WiEf2zHkttr7s9kijX"
13
-
14
-
15
- def create_brand_html(brand_link):
16
- urls = [brand_link]
17
- loader = UnstructuredURLLoader(urls=urls)
18
- data = loader.load()
19
- chunk_size = 3000
20
- chunk_overlap = 200
21
- text_splitter = CharacterTextSplitter(
22
- chunk_size=chunk_size,
23
- chunk_overlap=chunk_overlap,
24
- length_function=len,
25
- )
26
- texts = text_splitter.split_text(data[0].page_content)
27
- docs = [Document(page_content=t) for t in texts[:]]
28
- return docs
29
-
30
-
31
- def create_langchain_openai_query(docs):
32
- openai.api_key = OPENAI_API_KEY
33
- llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
34
- map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce")
35
- output = map_reduce_chain.run(docs)
36
- return output
 
1
+ import os
2
+ import base64
3
+ from langchain.docstore.document import Document
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.llms.openai import OpenAI
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain.document_loaders import UnstructuredURLLoader
8
+ import nltk
9
+ import openai
10
+
11
+ nltk.download('punkt')
12
+ OPENAI_API_KEY = ""
13
+
14
+
15
+ def create_brand_html(brand_link):
16
+ urls = [brand_link]
17
+ loader = UnstructuredURLLoader(urls=urls)
18
+ data = loader.load()
19
+ chunk_size = 3000
20
+ chunk_overlap = 200
21
+ text_splitter = CharacterTextSplitter(
22
+ chunk_size=chunk_size,
23
+ chunk_overlap=chunk_overlap,
24
+ length_function=len,
25
+ )
26
+ texts = text_splitter.split_text(data[0].page_content)
27
+ docs = [Document(page_content=t) for t in texts[:]]
28
+ return docs
29
+
30
+
31
+ def create_langchain_openai_query(docs):
32
+ openai.api_key = OPENAI_API_KEY
33
+ llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
34
+ map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce")
35
+ output = map_reduce_chain.run(docs)
36
+ return output