Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -97,10 +97,6 @@ def get_azure_response(messages, api_key, azure_api_base):
|
|
97 |
return response['choices'][0]['message']['content']
|
98 |
|
99 |
def cek_url(url):
|
100 |
-
if not os.path.exists("log_url.txt"):
|
101 |
-
with open("log_url.txt", 'w') as file:
|
102 |
-
pass
|
103 |
-
|
104 |
with open("log_url.txt", 'r') as file:
|
105 |
scraped_urls = set(url.strip() for url in file.readlines())
|
106 |
|
@@ -110,13 +106,13 @@ def cek_url(url):
|
|
110 |
scraped_urls.add(url)
|
111 |
return False
|
112 |
|
113 |
-
def scrap_portal(
|
114 |
api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
|
115 |
search_engine_id = 'a0dc878459ceb4811'
|
116 |
-
num_pages =
|
117 |
type = random.choice([' articles',' news',' trends',' technologies', ' future'])
|
118 |
link = []
|
119 |
-
query =
|
120 |
|
121 |
for page in range(num_pages):
|
122 |
start_index = page * 10 + 1
|
@@ -134,7 +130,7 @@ def scrap_portal(query):
|
|
134 |
filter_link2 = [url for url in filter_link1 if "help" not in url]
|
135 |
return filter_link2
|
136 |
|
137 |
-
def clean_scrap(artikel,link,models,api_key,azure_api_base):
|
138 |
new_artikel = []
|
139 |
article = []
|
140 |
if len(artikel) > 1:
|
@@ -190,7 +186,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
|
|
190 |
title = content[0].replace('title:', '').replace("Title:", '').strip()
|
191 |
messages=[
|
192 |
{"role": "system", "content": "You are a professional translator and rewriter"},
|
193 |
-
{"role": "user", "content": "Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read
|
194 |
]
|
195 |
if models == 'openai':
|
196 |
judul = get_openai_response(messages,api_key)
|
@@ -208,7 +204,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
|
|
208 |
|
209 |
return title, judul, link, contents
|
210 |
|
211 |
-
def scrap_artikel(source_type,source,models,api_key,azure_api_base):
|
212 |
options = webdriver.ChromeOptions()
|
213 |
options.add_argument('--headless')
|
214 |
options.add_argument('--no-sandbox')
|
@@ -242,14 +238,14 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
|
|
242 |
artic=paragraph.get_text()
|
243 |
artikel.append(artic)
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
|
254 |
else:
|
255 |
wd.get(source)
|
@@ -274,11 +270,11 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
|
|
274 |
artikels = [part1, part2, part3, part4]
|
275 |
else :
|
276 |
artikels = [paragraf]
|
277 |
-
title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base)
|
278 |
return title, judul, url, contents
|
279 |
|
280 |
def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
|
281 |
-
title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base)
|
282 |
teks_to_tags = artikel[0][:500]
|
283 |
translated = []
|
284 |
optimized = []
|
@@ -643,18 +639,18 @@ def post(endpoint,endpoint_media,username,password,tags,categories,metode):
|
|
643 |
return response_post.json()
|
644 |
|
645 |
def view_output():
|
646 |
-
try:
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
except:
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
|
659 |
def save(title,content):
|
660 |
with open("judul.txt", "w") as file:
|
|
|
97 |
return response['choices'][0]['message']['content']
|
98 |
|
99 |
def cek_url(url):
|
|
|
|
|
|
|
|
|
100 |
with open("log_url.txt", 'r') as file:
|
101 |
scraped_urls = set(url.strip() for url in file.readlines())
|
102 |
|
|
|
106 |
scraped_urls.add(url)
|
107 |
return False
|
108 |
|
109 |
+
def scrap_portal(queri):
|
110 |
api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
|
111 |
search_engine_id = 'a0dc878459ceb4811'
|
112 |
+
num_pages = 3
|
113 |
type = random.choice([' articles',' news',' trends',' technologies', ' future'])
|
114 |
link = []
|
115 |
+
query = queri + type
|
116 |
|
117 |
for page in range(num_pages):
|
118 |
start_index = page * 10 + 1
|
|
|
130 |
filter_link2 = [url for url in filter_link1 if "help" not in url]
|
131 |
return filter_link2
|
132 |
|
133 |
+
def clean_scrap(artikel,link,models,api_key,azure_api_base,keyword):
|
134 |
new_artikel = []
|
135 |
article = []
|
136 |
if len(artikel) > 1:
|
|
|
186 |
title = content[0].replace('title:', '').replace("Title:", '').strip()
|
187 |
messages=[
|
188 |
{"role": "system", "content": "You are a professional translator and rewriter"},
|
189 |
+
{"role": "user", "content": f"Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read. \n4. fokus keyword menggunakan keyword {keyword} harus ada di awal judul. \n5. Gaya Penulisan judul artikel seperti gaya forbes. \n6. Menggunakan bahasa indonesia yag mudah dipahami/familiar oleh manusia , :" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
|
190 |
]
|
191 |
if models == 'openai':
|
192 |
judul = get_openai_response(messages,api_key)
|
|
|
204 |
|
205 |
return title, judul, link, contents
|
206 |
|
207 |
+
def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
|
208 |
options = webdriver.ChromeOptions()
|
209 |
options.add_argument('--headless')
|
210 |
options.add_argument('--no-sandbox')
|
|
|
238 |
artic=paragraph.get_text()
|
239 |
artikel.append(artic)
|
240 |
|
241 |
+
paragraf = ' '.join(artikel)
|
242 |
+
if len(paragraf)>= 18000:
|
243 |
+
part1, part2, part3, part4 = split_article(paragraf)
|
244 |
+
artikels = [part1, part2, part3, part4]
|
245 |
+
else :
|
246 |
+
artikels = [paragraf]
|
247 |
+
title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base,keyword)
|
248 |
+
return title, judul, url, contents
|
249 |
|
250 |
else:
|
251 |
wd.get(source)
|
|
|
270 |
artikels = [part1, part2, part3, part4]
|
271 |
else :
|
272 |
artikels = [paragraf]
|
273 |
+
title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base,keyword)
|
274 |
return title, judul, url, contents
|
275 |
|
276 |
def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
|
277 |
+
title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base,keyword)
|
278 |
teks_to_tags = artikel[0][:500]
|
279 |
translated = []
|
280 |
optimized = []
|
|
|
639 |
return response_post.json()
|
640 |
|
641 |
def view_output():
|
642 |
+
# try:
|
643 |
+
with open('judul.txt', 'r') as file:
|
644 |
+
judul = file.read()
|
645 |
+
title = '<h1>'+judul+'</h1>'
|
646 |
+
with open('kontent.txt', 'r') as file:
|
647 |
+
kontent = file.read()
|
648 |
+
time.sleep(5)
|
649 |
+
return title,kontent
|
650 |
+
# except:
|
651 |
+
# title = 'Gagal Generate Judul'
|
652 |
+
# kontent = 'Gagal Generate Konten'
|
653 |
+
# return title,kontent
|
654 |
|
655 |
def save(title,content):
|
656 |
with open("judul.txt", "w") as file:
|