Dede16 commited on
Commit
fd48355
·
1 Parent(s): bc413eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -32
app.py CHANGED
@@ -97,10 +97,6 @@ def get_azure_response(messages, api_key, azure_api_base):
97
  return response['choices'][0]['message']['content']
98
 
99
  def cek_url(url):
100
- if not os.path.exists("log_url.txt"):
101
- with open("log_url.txt", 'w') as file:
102
- pass
103
-
104
  with open("log_url.txt", 'r') as file:
105
  scraped_urls = set(url.strip() for url in file.readlines())
106
 
@@ -110,13 +106,13 @@ def cek_url(url):
110
  scraped_urls.add(url)
111
  return False
112
 
113
- def scrap_portal(query):
114
  api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
115
  search_engine_id = 'a0dc878459ceb4811'
116
- num_pages = 5
117
  type = random.choice([' articles',' news',' trends',' technologies', ' future'])
118
  link = []
119
- query = query + type
120
 
121
  for page in range(num_pages):
122
  start_index = page * 10 + 1
@@ -134,7 +130,7 @@ def scrap_portal(query):
134
  filter_link2 = [url for url in filter_link1 if "help" not in url]
135
  return filter_link2
136
 
137
- def clean_scrap(artikel,link,models,api_key,azure_api_base):
138
  new_artikel = []
139
  article = []
140
  if len(artikel) > 1:
@@ -190,7 +186,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
190
  title = content[0].replace('title:', '').replace("Title:", '').strip()
191
  messages=[
192
  {"role": "system", "content": "You are a professional translator and rewriter"},
193
- {"role": "user", "content": "Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read.:" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
194
  ]
195
  if models == 'openai':
196
  judul = get_openai_response(messages,api_key)
@@ -208,7 +204,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
208
 
209
  return title, judul, link, contents
210
 
211
- def scrap_artikel(source_type,source,models,api_key,azure_api_base):
212
  options = webdriver.ChromeOptions()
213
  options.add_argument('--headless')
214
  options.add_argument('--no-sandbox')
@@ -242,14 +238,14 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
242
  artic=paragraph.get_text()
243
  artikel.append(artic)
244
 
245
- paragraf = ' '.join(artikel)
246
- if len(paragraf)>= 18000:
247
- part1, part2, part3, part4 = split_article(paragraf)
248
- artikels = [part1, part2, part3, part4]
249
- else :
250
- artikels = [paragraf]
251
- title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base)
252
- return title, judul, url, contents
253
 
254
  else:
255
  wd.get(source)
@@ -274,11 +270,11 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
274
  artikels = [part1, part2, part3, part4]
275
  else :
276
  artikels = [paragraf]
277
- title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base)
278
  return title, judul, url, contents
279
 
280
  def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
281
- title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base)
282
  teks_to_tags = artikel[0][:500]
283
  translated = []
284
  optimized = []
@@ -643,18 +639,18 @@ def post(endpoint,endpoint_media,username,password,tags,categories,metode):
643
  return response_post.json()
644
 
645
  def view_output():
646
- try:
647
- with open('judul.txt', 'r') as file:
648
- judul = file.read()
649
- title = '<h1>'+judul+'</h1>'
650
- with open('kontent.txt', 'r') as file:
651
- kontent = file.read()
652
- time.sleep(5)
653
- return title,kontent
654
- except:
655
- title = 'Gagal Generate Judul'
656
- kontent = 'Gagal Generate Konten'
657
- return title,kontent
658
 
659
  def save(title,content):
660
  with open("judul.txt", "w") as file:
 
97
  return response['choices'][0]['message']['content']
98
 
99
  def cek_url(url):
 
 
 
 
100
  with open("log_url.txt", 'r') as file:
101
  scraped_urls = set(url.strip() for url in file.readlines())
102
 
 
106
  scraped_urls.add(url)
107
  return False
108
 
109
+ def scrap_portal(queri):
110
  api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
111
  search_engine_id = 'a0dc878459ceb4811'
112
+ num_pages = 3
113
  type = random.choice([' articles',' news',' trends',' technologies', ' future'])
114
  link = []
115
+ query = queri + type
116
 
117
  for page in range(num_pages):
118
  start_index = page * 10 + 1
 
130
  filter_link2 = [url for url in filter_link1 if "help" not in url]
131
  return filter_link2
132
 
133
+ def clean_scrap(artikel,link,models,api_key,azure_api_base,keyword):
134
  new_artikel = []
135
  article = []
136
  if len(artikel) > 1:
 
186
  title = content[0].replace('title:', '').replace("Title:", '').strip()
187
  messages=[
188
  {"role": "system", "content": "You are a professional translator and rewriter"},
189
+ {"role": "user", "content": f"Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read. \n4. fokus keyword menggunakan keyword {keyword} harus ada di awal judul. \n5. Gaya Penulisan judul artikel seperti gaya forbes. \n6. Menggunakan bahasa indonesia yag mudah dipahami/familiar oleh manusia , :" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
190
  ]
191
  if models == 'openai':
192
  judul = get_openai_response(messages,api_key)
 
204
 
205
  return title, judul, link, contents
206
 
207
+ def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
208
  options = webdriver.ChromeOptions()
209
  options.add_argument('--headless')
210
  options.add_argument('--no-sandbox')
 
238
  artic=paragraph.get_text()
239
  artikel.append(artic)
240
 
241
+ paragraf = ' '.join(artikel)
242
+ if len(paragraf)>= 18000:
243
+ part1, part2, part3, part4 = split_article(paragraf)
244
+ artikels = [part1, part2, part3, part4]
245
+ else :
246
+ artikels = [paragraf]
247
+ title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base,keyword)
248
+ return title, judul, url, contents
249
 
250
  else:
251
  wd.get(source)
 
270
  artikels = [part1, part2, part3, part4]
271
  else :
272
  artikels = [paragraf]
273
+ title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base,keyword)
274
  return title, judul, url, contents
275
 
276
  def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
277
+ title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base,keyword)
278
  teks_to_tags = artikel[0][:500]
279
  translated = []
280
  optimized = []
 
639
  return response_post.json()
640
 
641
  def view_output():
642
+ # try:
643
+ with open('judul.txt', 'r') as file:
644
+ judul = file.read()
645
+ title = '<h1>'+judul+'</h1>'
646
+ with open('kontent.txt', 'r') as file:
647
+ kontent = file.read()
648
+ time.sleep(5)
649
+ return title,kontent
650
+ # except:
651
+ # title = 'Gagal Generate Judul'
652
+ # kontent = 'Gagal Generate Konten'
653
+ # return title,kontent
654
 
655
  def save(title,content):
656
  with open("judul.txt", "w") as file: