Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv, find_dotenv | |
from serpapi import GoogleSearch | |
import json | |
import gradio as gr | |
import openai | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import UnstructuredURLLoader | |
from langchain.docstore.document import Document | |
from langchain.chains import SequentialChain | |
from langchain.chains.llm import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.summarize import load_summarize_chain | |
from unstructured.cleaners.core import remove_punctuation,clean,clean_extra_whitespace | |
_ = load_dotenv(find_dotenv()) # read local .env file | |
# ============== UTILITY FUNCTIONS ============== | |
def generate_trend(date_str: str): | |
SERP_API_KEY = os.environ['SERP_API_KEY'] | |
params = { | |
'api_key': SERP_API_KEY, | |
'engine': 'google_trends_trending_now', | |
'hl': 'id', | |
'geo': 'ID', | |
'date': date_str, | |
'frequency': 'daily' | |
} | |
search = GoogleSearch(params) | |
results = search.get_dict() | |
if len(results['daily_searches'][0]['searches']) > 10: | |
res = results['daily_searches'][0]['searches'][:10] | |
else: | |
res = results['daily_searches'][0]['searches'] | |
trends = [] | |
for search in res: | |
trends.append(search['query']) | |
return trends, res | |
def fetch_article_urls(res_dict, selected_topic: str): | |
for item in res_dict: | |
if item.get('query') == selected_topic: | |
article_urls = [article['link'] for article in item['articles']] | |
return article_urls | |
# if the selected topic is not found | |
return [] | |
def extract_article(url): | |
"Given an URL, return a langchain Document to futher processing" | |
loader = UnstructuredURLLoader( | |
urls=[url], mode="elements", | |
post_processors=[clean,remove_punctuation,clean_extra_whitespace] | |
) | |
elements = loader.load() | |
selected_elements = [e for e in elements if e.metadata['category']=="NarrativeText"] | |
full_clean = " ".join([e.page_content for e in selected_elements]) | |
return Document(page_content=full_clean, metadata={"source":url}) | |
# ============== UTILITY FUNCTIONS ============== | |
# ============== GRADIO FUNCTIONS ============== | |
def dropdown_trend(year_txt, month_txt, date_txt): | |
date_str = year_txt + month_txt + date_txt | |
trends, res = generate_trend(date_str) | |
return gr.Dropdown.update(choices=trends), res | |
def generate(topic, trends_dic): | |
article_urls = fetch_article_urls(trends_dic, topic) | |
article_url_str = "\n- ".join(article_urls) | |
article_docs = [extract_article(url) for url in article_urls] | |
openai.api_key = os.environ['OPENAI_API_KEY'] | |
OpenAIModel = "gpt-3.5-turbo" | |
llm = ChatOpenAI(model=OpenAIModel, temperature=0.1) | |
summarize_prompt_template = """Write a concise summary of the following Indonesian articles: | |
{text} | |
CONCISE SUMMARY: | |
""" | |
prompt = PromptTemplate.from_template(summarize_prompt_template) | |
refine_template = ( | |
"Your job is to produce a final summary\n" | |
"We have provided an existing summary up to a certain point: {existing_answer}\n" | |
"We have the opportunity to refine the existing summary" | |
"(only if needed) with some more context below.\n" | |
"------------\n" | |
"{text}\n" | |
"------------\n" | |
"If the context isn't useful, return the original summary." | |
) | |
refine_prompt = PromptTemplate.from_template(refine_template) | |
summarize_chain = load_summarize_chain( | |
llm=llm, | |
chain_type="refine", | |
question_prompt=prompt, | |
refine_prompt=refine_prompt, | |
return_intermediate_steps=True, | |
input_key="input_documents", | |
output_key="summarize_output", | |
verbose=False | |
) | |
translate_prompt_template = """Translate this following text to Indonesian: | |
{summarize_output} | |
""" | |
translate_prompt = PromptTemplate.from_template(translate_prompt_template) | |
translate_chain = LLMChain( | |
llm=llm, | |
prompt=translate_prompt, | |
output_key="translated_summary", | |
verbose=True | |
) | |
llm_2 = ChatOpenAI(model=OpenAIModel, temperature=0.8) | |
tweet_prompt_template = """Generate a list of three varied versions of Twitter post sequences. Each version has 3 to 10 coherent threads. \ | |
The topic of the post is as follows: | |
{translated_summary} | |
You are required to write it in Indonesian. Keep it fun to read by adding some emojis and supporting hashtags (just if you think it's necessary). | |
Output it as an array with 3 JSON items format with the following keys: | |
- version: <version 1/2/3>, | |
- tweet: <the tweet, each thread separated by the number of the sequence and new line char> | |
""" | |
tweet_prompt = PromptTemplate.from_template(tweet_prompt_template) | |
tweet_chain = LLMChain( | |
llm=llm_2, | |
prompt = tweet_prompt, | |
output_key="output_text", | |
verbose=True | |
) | |
sequentialChain = SequentialChain( | |
chains=[summarize_chain, translate_chain, tweet_chain], | |
input_variables=["input_documents"], | |
output_variables=["translated_summary", "output_text"], | |
verbose=True | |
) | |
res = sequentialChain({"input_documents": article_docs}) | |
summary = [res['translated_summary'] + '\n\nSources:\n' + article_url_str] | |
generated_res = json.loads(res['output_text']) | |
tweets = [] | |
for res in generated_res: | |
tweets.append(res.get('tweet')) | |
return summary + tweets | |
# ============== GRADIO FUNCTIONS ============== | |
options = ['Do the Browse Trend first'] | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# π°π₯ Trending News Article-based Tweet (π) Generator | |
Don't know a current trend? Have no resources to do a research? But you wanna gain a traffic to your Twitter a.k.a π? This is a perfect solution for you! | |
With a single click, you will get the top 10 most-searched topic in Google Search on specific date. Select one of them, we'll fetch some articles related to your selected topic. | |
Finally, foala! You get three drafts of tweet that you can simply copy-paste to your Twitter/π! | |
Psst, for now it will take around **~2 minutes** from fetching several articles related to selected topic until we generate the tweet drafts. We'll improve it soon! | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
year_txt = gr.Textbox(label="year (yyyy)") | |
month_txt = gr.Textbox(label="month (mm)") | |
date_txt = gr.Textbox(label="date (dd)") | |
btn_fetch_trend = gr.Button("1. Browse Trend") | |
trend_options = gr.Dropdown(options, label="Top 10 trends") | |
trend_res = gr.JSON(visible=False) | |
generate_btn = gr.Button("2. Generate now!", variant='primary') | |
with gr.Column(scale=1): | |
trend_summary = gr.Textbox(label='Trend Summary') | |
with gr.Tab("Draft 1"): | |
ver_1 = gr.Textbox(lines=10, show_copy_button=True) | |
with gr.Tab("Draft 2"): | |
ver_2 = gr.Textbox(lines=10, show_copy_button=True) | |
with gr.Tab("Draft 3"): | |
ver_3 = gr.Textbox(lines=10, show_copy_button=True) | |
btn_fetch_trend.click(dropdown_trend, inputs=[year_txt, month_txt, date_txt], outputs=[trend_options, trend_res]) | |
generate_btn.click(generate, inputs=[trend_options, trend_res], outputs=[trend_summary, ver_1, ver_2, ver_3]) | |
demo.launch(debug=True) |