Aswin Candra commited on
Commit
421bbee
Β·
1 Parent(s): e4edb0b

initial commit

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +208 -0
  3. requirements.txt +8 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv, find_dotenv
3
+ from serpapi import GoogleSearch
4
+ import json
5
+ import gradio as gr
6
+ import openai
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.document_loaders import UnstructuredURLLoader
9
+ from langchain.docstore.document import Document
10
+ from langchain.chains import SequentialChain
11
+ from langchain.chains.llm import LLMChain
12
+ from langchain.prompts import PromptTemplate
13
+ from langchain.chains.summarize import load_summarize_chain
14
+ from unstructured.cleaners.core import remove_punctuation,clean,clean_extra_whitespace
15
+
16
+ _ = load_dotenv(find_dotenv()) # read local .env file
17
+
18
+ # ============== UTILITY FUNCTIONS ==============
19
+
20
+ def generate_trend(date_str: str):
21
+ SERP_API_KEY = os.environ['SERP_API_KEY']
22
+
23
+ params = {
24
+ 'api_key': SERP_API_KEY,
25
+ 'engine': 'google_trends_trending_now',
26
+ 'hl': 'id',
27
+ 'geo': 'ID',
28
+ 'date': date_str,
29
+ 'frequency': 'daily'
30
+ }
31
+
32
+ search = GoogleSearch(params)
33
+ results = search.get_dict()
34
+ if len(results['daily_searches'][0]['searches']) > 10:
35
+ res = results['daily_searches'][0]['searches'][:10]
36
+ else:
37
+ res = results['daily_searches'][0]['searches']
38
+
39
+ trends = []
40
+ for search in res:
41
+ trends.append(search['query'])
42
+
43
+ return trends, res
44
+
45
+ def fetch_article_urls(res_dict, selected_topic: str):
46
+ for item in res_dict:
47
+ if item.get('query') == selected_topic:
48
+ article_urls = [article['link'] for article in item['articles']]
49
+ return article_urls
50
+
51
+ # if the selected topic is not found
52
+ return []
53
+
54
+ def extract_article(url):
55
+ "Given an URL, return a langchain Document to futher processing"
56
+ loader = UnstructuredURLLoader(
57
+ urls=[url], mode="elements",
58
+ post_processors=[clean,remove_punctuation,clean_extra_whitespace]
59
+ )
60
+ elements = loader.load()
61
+ selected_elements = [e for e in elements if e.metadata['category']=="NarrativeText"]
62
+ full_clean = " ".join([e.page_content for e in selected_elements])
63
+ return Document(page_content=full_clean, metadata={"source":url})
64
+
65
+ # ============== UTILITY FUNCTIONS ==============
66
+
67
+
68
+ # ============== GRADIO FUNCTIONS ==============
69
+
70
+ def dropdown_trend(year_txt, month_txt, date_txt):
71
+ date_str = year_txt + month_txt + date_txt
72
+ trends, res = generate_trend(date_str)
73
+ return gr.Dropdown.update(choices=trends), res
74
+
75
+ def generate(topic, trends_dic):
76
+ article_urls = fetch_article_urls(trends_dic, topic)
77
+ article_docs = [extract_article(url) for url in article_urls]
78
+
79
+ openai.api_key = os.environ['OPENAI_API_KEY']
80
+
81
+ OpenAIModel = "gpt-3.5-turbo"
82
+ llm = ChatOpenAI(model=OpenAIModel, temperature=0.1)
83
+
84
+ summarize_prompt_template = """Write a concise summary of the following Indonesian articles:
85
+ {text}
86
+
87
+ CONCISE SUMMARY:
88
+ """
89
+
90
+ prompt = PromptTemplate.from_template(summarize_prompt_template)
91
+
92
+ refine_template = (
93
+ "Your job is to produce a final summary\n"
94
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
95
+ "We have the opportunity to refine the existing summary"
96
+ "(only if needed) with some more context below.\n"
97
+ "------------\n"
98
+ "{text}\n"
99
+ "------------\n"
100
+ "If the context isn't useful, return the original summary."
101
+ )
102
+ refine_prompt = PromptTemplate.from_template(refine_template)
103
+
104
+ summarize_chain = load_summarize_chain(
105
+ llm=llm,
106
+ chain_type="refine",
107
+ question_prompt=prompt,
108
+ refine_prompt=refine_prompt,
109
+ return_intermediate_steps=True,
110
+ input_key="input_documents",
111
+ output_key="summarize_output",
112
+ verbose=False
113
+ )
114
+
115
+ translate_prompt_template = """Translate this following text to Indonesian:
116
+ {summarize_output}
117
+ """
118
+
119
+ translate_prompt = PromptTemplate.from_template(translate_prompt_template)
120
+
121
+ translate_chain = LLMChain(
122
+ llm=llm,
123
+ prompt=translate_prompt,
124
+ output_key="translated_summary",
125
+ verbose=True
126
+ )
127
+
128
+ llm_2 = ChatOpenAI(model=OpenAIModel, temperature=0.8)
129
+
130
+ tweet_prompt_template = """Generate a list of three varied versions of Twitter post sequences. Each version has 3 to 10 coherent threads. \
131
+ The topic of the post is as follows:
132
+ {translated_summary}
133
+
134
+ You are required to write it in Indonesian. Keep it fun to read by adding some emojis and supporting hashtags (just if you think it's necessary).
135
+
136
+ Output it as an array with 3 JSON items format with the following keys:
137
+ - version: <version 1/2/3>,
138
+ - tweet: <the tweet, each thread separated by the number of the sequence and new line char>
139
+ """
140
+
141
+ tweet_prompt = PromptTemplate.from_template(tweet_prompt_template)
142
+
143
+ tweet_chain = LLMChain(
144
+ llm=llm_2,
145
+ prompt = tweet_prompt,
146
+ output_key="output_text",
147
+ verbose=True
148
+ )
149
+
150
+ sequentialChain = SequentialChain(
151
+ chains=[summarize_chain, translate_chain, tweet_chain],
152
+ input_variables=["input_documents"],
153
+ output_variables=["translated_summary", "output_text"],
154
+ verbose=True
155
+ )
156
+
157
+ res = sequentialChain({"input_documents": article_docs})
158
+
159
+ summary = [res['translated_summary']]
160
+ generated_res = json.loads(res['output_text'])
161
+
162
+ tweets = []
163
+ for res in generated_res:
164
+ tweets.append(res.get('tweet'))
165
+
166
+ return summary + tweets
167
+
168
+ # ============== GRADIO FUNCTIONS ==============
169
+
170
+ options = ['Do the Browse Trend first']
171
+ with gr.Blocks() as demo:
172
+ gr.Markdown(
173
+ """
174
+ # πŸ“°πŸ–₯ Trending News Article-based Tweet (𝕏) Generator
175
+ Don't know a current trend? Have no resources to do a research? But you wanna gain a traffic to your Twitter a.k.a 𝕏? This is a perfect solution for you!
176
+
177
+ With a single click, you will get the top 10 most-searched topic in Google Search on specific date. Select one of them, we'll fetch some articles related to your selected topic.
178
+
179
+ Finally, foala! You get three drafts of tweet that you can simply copy-paste to your Twitter/𝕏!
180
+
181
+ Psst, for now it will take around **~2 minutes** from fetching several articles related to selected topic until we generate the tweet drafts. We'll improve it soon!
182
+ """
183
+ )
184
+ with gr.Row():
185
+ with gr.Column(scale=1):
186
+ with gr.Row():
187
+ year_txt = gr.Textbox(label="year (yyyy)")
188
+ month_txt = gr.Textbox(label="month (mm)")
189
+ date_txt = gr.Textbox(label="date (dd)")
190
+
191
+ btn_fetch_trend = gr.Button("1. Browse Trend")
192
+ trend_options = gr.Dropdown(options, label="Top 10 trends")
193
+ trend_res = gr.JSON(visible=False)
194
+ generate_btn = gr.Button("2. Generate now!", variant='primary')
195
+ with gr.Column(scale=1):
196
+ trend_summary = gr.Textbox(label='Trend Summary')
197
+ with gr.Tab("Draft 1"):
198
+ ver_1 = gr.Textbox(lines=10, show_copy_button=True, show_label=False)
199
+ with gr.Tab("Draft 2"):
200
+ ver_2 = gr.Textbox(lines=10, show_copy_button=True, show_label=False)
201
+ with gr.Tab("Draft 3"):
202
+ ver_3 = gr.Textbox(lines=10, show_copy_button=True, show_label=False)
203
+
204
+
205
+ btn_fetch_trend.click(dropdown_trend, inputs=[year_txt, month_txt, date_txt], outputs=[trend_options, trend_res])
206
+ generate_btn.click(generate, inputs=[trend_options, trend_res], outputs=[trend_summary, ver_1, ver_2, ver_3])
207
+
208
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ google_search_results==2.4.2
2
+ gradio==3.48.0
3
+ langchain==0.0.316
4
+ openai==0.27.6
5
+ python-dotenv==1.0.0
6
+ unstructured==0.10.24
7
+ python-magic==0.4.27
8
+ python-magic-bin==0.4.14