File size: 7,570 Bytes
421bbee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1965ec1
421bbee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1965ec1
421bbee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e562ea
421bbee
2e562ea
421bbee
2e562ea
421bbee
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
from dotenv import load_dotenv, find_dotenv
from serpapi import GoogleSearch
import json
import gradio as gr
import openai
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredURLLoader
from langchain.docstore.document import Document
from langchain.chains import SequentialChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from unstructured.cleaners.core import remove_punctuation,clean,clean_extra_whitespace

_ = load_dotenv(find_dotenv()) # read local .env file

# ============== UTILITY FUNCTIONS ============== 

def generate_trend(date_str: str):
    SERP_API_KEY = os.environ['SERP_API_KEY']

    params = {
        'api_key': SERP_API_KEY,
        'engine': 'google_trends_trending_now',
        'hl': 'id',
        'geo': 'ID',
        'date': date_str,
        'frequency': 'daily'
    }

    search = GoogleSearch(params)
    results = search.get_dict()
    if len(results['daily_searches'][0]['searches']) > 10:
        res = results['daily_searches'][0]['searches'][:10]
    else: 
        res = results['daily_searches'][0]['searches']
    
    trends = []
    for search in res:
        trends.append(search['query'])
    
    return trends, res

def fetch_article_urls(res_dict, selected_topic: str):
    for item in res_dict:
        if item.get('query') == selected_topic:
            article_urls = [article['link'] for article in item['articles']]
            return article_urls
    
    # if the selected topic is not found
    return []

def extract_article(url):
    "Given an URL, return a langchain Document to futher processing"
    loader = UnstructuredURLLoader(
        urls=[url], mode="elements",
        post_processors=[clean,remove_punctuation,clean_extra_whitespace]
    )
    elements = loader.load()
    selected_elements = [e for e in elements if e.metadata['category']=="NarrativeText"]
    full_clean = " ".join([e.page_content for e in selected_elements])
    return Document(page_content=full_clean, metadata={"source":url})

# ============== UTILITY FUNCTIONS ============== 


# ============== GRADIO FUNCTIONS ============== 

def dropdown_trend(year_txt, month_txt, date_txt):
   date_str = year_txt + month_txt + date_txt
   trends, res = generate_trend(date_str)
   return gr.Dropdown.update(choices=trends), res

def generate(topic, trends_dic):
    article_urls = fetch_article_urls(trends_dic, topic)
    article_url_str = "\n- ".join(article_urls)
    article_docs = [extract_article(url) for url in article_urls]

    openai.api_key  = os.environ['OPENAI_API_KEY']

    OpenAIModel = "gpt-3.5-turbo"
    llm = ChatOpenAI(model=OpenAIModel, temperature=0.1)

    summarize_prompt_template = """Write a concise summary of the following Indonesian articles:
    {text}

    CONCISE SUMMARY:
    """

    prompt = PromptTemplate.from_template(summarize_prompt_template)

    refine_template = (
        "Your job is to produce a final summary\n"
        "We have provided an existing summary up to a certain point: {existing_answer}\n"
        "We have the opportunity to refine the existing summary"
        "(only if needed) with some more context below.\n"
        "------------\n"
        "{text}\n"
        "------------\n"
        "If the context isn't useful, return the original summary."
    )
    refine_prompt = PromptTemplate.from_template(refine_template)
    
    summarize_chain = load_summarize_chain(
        llm=llm,
        chain_type="refine",
        question_prompt=prompt,
        refine_prompt=refine_prompt,
        return_intermediate_steps=True,
        input_key="input_documents",
        output_key="summarize_output",
        verbose=False 
    )

    translate_prompt_template = """Translate this following text to Indonesian:
    {summarize_output}
    """

    translate_prompt = PromptTemplate.from_template(translate_prompt_template)

    translate_chain = LLMChain(
        llm=llm,
        prompt=translate_prompt,
        output_key="translated_summary",
        verbose=True
    )

    llm_2 = ChatOpenAI(model=OpenAIModel, temperature=0.8)

    tweet_prompt_template = """Generate a list of three varied versions of Twitter post sequences. Each version has 3 to 10 coherent threads. \
    The topic of the post is as follows: 
    {translated_summary}

    You are required to write it in Indonesian. Keep it fun to read by adding some emojis and supporting hashtags (just if you think it's necessary).

    Output it as an array with 3 JSON items format with the following keys: 
    - version: <version 1/2/3>,
    - tweet: <the tweet, each thread separated by the number of the sequence and new line char>
    """

    tweet_prompt = PromptTemplate.from_template(tweet_prompt_template)

    tweet_chain = LLMChain(
        llm=llm_2,
        prompt = tweet_prompt,
        output_key="output_text",
        verbose=True 
    )

    sequentialChain = SequentialChain(
        chains=[summarize_chain, translate_chain, tweet_chain],
        input_variables=["input_documents"],
        output_variables=["translated_summary", "output_text"],
        verbose=True
    )

    res = sequentialChain({"input_documents": article_docs})

    summary = [res['translated_summary'] + '\n\nSources:\n' + article_url_str]
    generated_res = json.loads(res['output_text'])
    
    tweets = []
    for res in generated_res:
        tweets.append(res.get('tweet'))

    return summary + tweets

# ============== GRADIO FUNCTIONS ============== 

options = ['Do the Browse Trend first']
with gr.Blocks() as demo:
    gr.Markdown(
    """
    # πŸ“°πŸ–₯ Trending News Article-based Tweet (𝕏) Generator
    Don't know a current trend? Have no resources to do a research? But you wanna gain a traffic to your Twitter a.k.a 𝕏? This is a perfect solution for you!

    With a single click, you will get the top 10 most-searched topic in Google Search on specific date. Select one of them, we'll fetch some articles related to your selected topic. 

    Finally, foala! You get three drafts of tweet that you can simply copy-paste to your Twitter/𝕏! 

    Psst, for now it will take around **~2 minutes** from fetching several articles related to selected topic until we generate the tweet drafts. We'll improve it soon!
    """
    )
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                year_txt = gr.Textbox(label="year (yyyy)")
                month_txt = gr.Textbox(label="month (mm)")
                date_txt = gr.Textbox(label="date (dd)")
            
            btn_fetch_trend = gr.Button("1. Browse Trend")
            trend_options = gr.Dropdown(options, label="Top 10 trends")
            trend_res = gr.JSON(visible=False)
            generate_btn = gr.Button("2. Generate now!", variant='primary')
        with gr.Column(scale=1):
            trend_summary = gr.Textbox(label='Trend Summary')
            with gr.Tab("Draft 1"):
                ver_1 = gr.Textbox(lines=10, show_copy_button=True)
            with gr.Tab("Draft 2"):
                ver_2 = gr.Textbox(lines=10, show_copy_button=True)
            with gr.Tab("Draft 3"):
                ver_3 = gr.Textbox(lines=10, show_copy_button=True)
          

    btn_fetch_trend.click(dropdown_trend, inputs=[year_txt, month_txt, date_txt], outputs=[trend_options, trend_res])
    generate_btn.click(generate, inputs=[trend_options, trend_res], outputs=[trend_summary, ver_1, ver_2, ver_3])

demo.launch(debug=True)