Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,57 +14,153 @@ import base64
|
|
14 |
import random
|
15 |
from PIL import Image
|
16 |
import gradio as gr
|
|
|
17 |
|
18 |
-
def
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
28 |
-
options.add_argument(f"user-agent={user_agent}")
|
29 |
|
30 |
-
|
31 |
-
wd.get(url+"search/?q="+keyword)
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
return portal
|
67 |
-
|
68 |
with gr.Blocks(theme = "soft", title="Article Writer") as article_generator:
|
69 |
gr.Markdown(
|
70 |
"""
|
@@ -72,11 +168,14 @@ with gr.Blocks(theme = "soft", title="Article Writer") as article_generator:
|
|
72 |
Buat artikel yang anda inginkan dengan mudah, cukup menuliskan keyword atau topik artikel yang ingin anda buat dan dalam sekejap artikel tersebut menjadi milik anda.
|
73 |
""")
|
74 |
with gr.Row():
|
75 |
-
inputs = gr.Textbox(placeholder="
|
76 |
with gr.Row():
|
77 |
-
button_scrap = gr.Button("Scrap")
|
78 |
with gr.Row():
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
81 |
if __name__ == "__main__":
|
82 |
article_generator.launch(share=False)
|
|
|
14 |
import random
|
15 |
from PIL import Image
|
16 |
import gradio as gr
|
17 |
+
openai.api_key ='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'
|
18 |
|
19 |
+
def split_article(article_text):
|
20 |
+
words = article_text.split()
|
21 |
+
total_words = len(words)
|
22 |
+
split_points = [total_words // 4, total_words // 2, (3 * total_words) // 4]
|
23 |
|
24 |
+
first_quarter = ' '.join(words[:split_points[0]])
|
25 |
+
second_quarter = ' '.join(words[split_points[0]:split_points[1]])
|
26 |
+
third_quarter = ' '.join(words[split_points[1]:split_points[2]])
|
27 |
+
fourth_quarter = ' '.join(words[split_points[2]:])
|
|
|
|
|
|
|
28 |
|
29 |
+
return first_quarter, second_quarter, third_quarter, fourth_quarter
|
|
|
30 |
|
31 |
+
def clean_scrap(artikel,url):
|
32 |
+
new_artikel = []
|
33 |
+
article = []
|
34 |
+
if len(artikel) > 1:
|
35 |
+
for art in artikel:
|
36 |
+
response = openai.ChatCompletion.create(
|
37 |
+
model ="gpt-3.5-turbo",
|
38 |
+
messages=[
|
39 |
+
{"role": "system", "content": "You are a very professional article editor."},
|
40 |
+
{"role": "user", "content": "I have a raw article that contains a lot of unnecessary data such as ads, website information, and article publishers, as well as links to other pages, and so on. Please clean up the article I provided so that only the article's content remains. \nThen, you should also summarize the article so that it does not exceed 5000 characters" + art + "\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: {the cleaned and summarized article's content}"}
|
41 |
+
],
|
42 |
+
temperature = 0.1
|
43 |
+
)
|
44 |
+
finish_reason = response['choices'][0]['finish_reason']
|
45 |
+
if finish_reason == 'length' or finish_reason == 'stop':
|
46 |
+
result = response['choices'][0]['message']['content']
|
47 |
+
new_artikel.append(result)
|
48 |
+
else:
|
49 |
+
with open(file_path, 'a') as file:
|
50 |
+
file.write(url + '\n')
|
51 |
+
return None,None,None,None
|
52 |
+
else:
|
53 |
+
for art in artikel:
|
54 |
+
response = openai.ChatCompletion.create(
|
55 |
+
model ="gpt-3.5-turbo",
|
56 |
+
messages=[
|
57 |
+
{"role": "system", "content": "You are a very professional article editor."},
|
58 |
+
{"role": "user", "content": "I have a raw article that contains a lot of unnecessary data such as ads, website information, and article publishers, as well as links to other pages, and so on. Please clean up the article I provided so that only the article's content remains." + art + "\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: {the cleaned article's content}"}
|
59 |
+
],
|
60 |
+
temperature = 0.1
|
61 |
+
)
|
62 |
+
finish_reason = response['choices'][0]['finish_reason']
|
63 |
+
if finish_reason == 'length' or finish_reason == 'stop':
|
64 |
+
result = response['choices'][0]['message']['content']
|
65 |
+
new_artikel.append(result)
|
66 |
+
else:
|
67 |
+
with open(file_path, 'a') as file:
|
68 |
+
file.write(url + '\n')
|
69 |
+
return None,None,None,None
|
70 |
+
|
71 |
+
new_art = [' '.join(new_artikel)]
|
72 |
+
for art in new_art:
|
73 |
+
response = openai.ChatCompletion.create(
|
74 |
+
model ="gpt-3.5-turbo",
|
75 |
+
messages=[
|
76 |
+
{"role": "system", "content": "You are a very professional article editor and capable of generating compelling and professional article titles."},
|
77 |
+
{"role": "user", "content": "Paraphrase the above article to make it a well-written and easily understandable piece for humans, following the conventions of renowned articles. \nThen, You Must Generate a title that is appropriate for the article I provided. The title should be professional, similar to typical article titles and sound more natural for a human to read" + art + "\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: title:{title}, article: {new paraphrased article}"}
|
78 |
+
],
|
79 |
+
temperature = 0.1
|
80 |
+
)
|
81 |
+
finish_reason = response['choices'][0]['finish_reason']
|
82 |
+
if finish_reason == 'length' or finish_reason == 'stop':
|
83 |
+
result = response['choices'][0]['message']['content']
|
84 |
+
article.append(result)
|
85 |
+
else:
|
86 |
+
with open(file_path, 'a') as file:
|
87 |
+
file.write(url + '\n')
|
88 |
+
return None,None,None,None
|
89 |
|
90 |
+
content = article[0].split("\n")
|
91 |
+
title = content[0].replace('title:', '').strip()
|
92 |
+
response = openai.ChatCompletion.create(
|
93 |
+
model ="gpt-3.5-turbo",
|
94 |
+
messages=[
|
95 |
+
{"role": "system", "content": "You are a professional translator and rewriter"},
|
96 |
+
{"role": "user", "content": "Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read.:" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
|
97 |
+
],
|
98 |
+
temperature = 0
|
99 |
+
)
|
100 |
+
judul = response['choices'][0]['message']['content']
|
101 |
+
judul = judul.replace("Judul:", '').strip()
|
102 |
+
judul = judul.replace("Title:", '').strip()
|
103 |
|
104 |
+
contents = content[1:]
|
105 |
+
contents = [' '.join(contents).replace("article:", '').replace("Article:", '').strip()]
|
106 |
+
len_contents = len(contents[0])
|
107 |
+
if len_contents > 2000:
|
108 |
+
return title, judul, url, content
|
109 |
+
else:
|
110 |
+
with open(file_path, 'a') as file:
|
111 |
+
file.write(url + '\n')
|
112 |
+
return None,None,None,None
|
113 |
|
114 |
+
def scrap_artikel(alamat):
|
115 |
+
artikel = []
|
116 |
+
link = alamat
|
117 |
+
for url in link:
|
118 |
+
if cek_url(url):
|
119 |
+
continue
|
120 |
+
else:
|
121 |
+
if len(artikel) >=1:
|
122 |
+
continue
|
123 |
+
options = webdriver.ChromeOptions()
|
124 |
+
options.add_argument('--headless')
|
125 |
+
options.add_argument('--no-sandbox')
|
126 |
+
options.add_argument('--disable-dev-shm-usage')
|
127 |
+
|
128 |
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
129 |
+
options.add_argument(f"user-agent={user_agent}")
|
130 |
+
|
131 |
+
wd = webdriver.Chrome(options=options)
|
132 |
+
wd.get(url+"search/?q="+keyword)
|
133 |
+
|
134 |
+
wd.find_element(By.CSS_SELECTOR, 'body').send_keys(Keys.CONTROL, Keys.END)
|
135 |
+
time.sleep(1)
|
136 |
+
|
137 |
+
raw_html = wd.find_element(By.TAG_NAME, 'body').get_attribute('innerHTML')
|
138 |
+
wd.quit()
|
139 |
+
|
140 |
+
soup_html = BeautifulSoup(raw_html, "html.parser")
|
141 |
+
containers = soup_html.findAll('p')
|
142 |
|
143 |
+
artikel =[]
|
144 |
+
for paragraph in containers:
|
145 |
+
artic=paragraph.get_text()
|
146 |
+
artikel.append(artic)
|
147 |
|
148 |
+
paragraf = ' '.join(artikel)
|
149 |
+
len_paragraf = len(paragraf)
|
150 |
+
if len_paragraf >= 18000:
|
151 |
+
part1, part2, part3, part4 = split_article(paragraf)
|
152 |
+
artikels = [part1, part2, part3, part4]
|
153 |
+
else :
|
154 |
+
artikels = [paragraf]
|
155 |
+
len_artikel = len("".join(artikels))
|
156 |
+
if len_artikel > 1200 and len_artikel < 28000:
|
157 |
+
contents, url, title, judul = clean_scrap(artikels,url)
|
158 |
+
return title, judul, url, content
|
159 |
+
else:
|
160 |
+
with open(file_path, 'a') as file:
|
161 |
+
file.write(url + '\n')
|
162 |
+
return None,None,None,None
|
163 |
|
|
|
|
|
164 |
with gr.Blocks(theme = "soft", title="Article Writer") as article_generator:
|
165 |
gr.Markdown(
|
166 |
"""
|
|
|
168 |
Buat artikel yang anda inginkan dengan mudah, cukup menuliskan keyword atau topik artikel yang ingin anda buat dan dalam sekejap artikel tersebut menjadi milik anda.
|
169 |
""")
|
170 |
with gr.Row():
|
171 |
+
inputs = gr.Textbox(placeholder="Link Article", show_label=False)
|
172 |
with gr.Row():
|
173 |
+
button_scrap = gr.Button("Scrap Article")
|
174 |
with gr.Row():
|
175 |
+
title = gr.Textbox("", label="Title")
|
176 |
+
judul = gr.Textbox("", label="Judul")
|
177 |
+
url = gr.Textbox("", label="URL")
|
178 |
+
content = gr.Textbox("", label="Content")
|
179 |
+
button_scrap.click(fn=scrap_artikel, inputs=inputs, outputs=[title,judul,url,content])
|
180 |
if __name__ == "__main__":
|
181 |
article_generator.launch(share=False)
|