Spaces:

Dede16
/

Article_Gen4

Running

App Files Files Community

Dede16 commited on Nov 26, 2023

Commit

fd48355

1 Parent(s): bc413eb

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -32

app.py CHANGED Viewed

@@ -97,10 +97,6 @@ def get_azure_response(messages, api_key, azure_api_base):
         return response['choices'][0]['message']['content']
 def cek_url(url):
-    if not os.path.exists("log_url.txt"):
-        with open("log_url.txt", 'w') as file:
-            pass
     with open("log_url.txt", 'r') as file:
         scraped_urls = set(url.strip() for url in file.readlines())
@@ -110,13 +106,13 @@ def cek_url(url):
         scraped_urls.add(url)
         return False
-def scrap_portal(query):
     api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
     search_engine_id = 'a0dc878459ceb4811'
-    num_pages = 5
     type = random.choice([' articles',' news',' trends',' technologies', ' future'])
     link = []
-    query = query + type
     for page in range(num_pages):
         start_index = page * 10 + 1
@@ -134,7 +130,7 @@ def scrap_portal(query):
     filter_link2 = [url for url in filter_link1 if "help" not in url]
     return filter_link2
-def clean_scrap(artikel,link,models,api_key,azure_api_base):
     new_artikel = []
     article = []
     if len(artikel) > 1:
@@ -190,7 +186,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
     title = content[0].replace('title:', '').replace("Title:", '').strip()
     messages=[
         {"role": "system", "content": "You are a professional translator and rewriter"},
-        {"role": "user", "content": "Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read.:" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
     ]
     if models == 'openai':
         judul = get_openai_response(messages,api_key)
@@ -208,7 +204,7 @@ def clean_scrap(artikel,link,models,api_key,azure_api_base):
     return title, judul, link, contents
-def scrap_artikel(source_type,source,models,api_key,azure_api_base):
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
     options.add_argument('--no-sandbox')
@@ -242,14 +238,14 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
                 artic=paragraph.get_text()
                 artikel.append(artic)
-        paragraf = ' '.join(artikel)
-        if len(paragraf)>= 18000:
-            part1, part2, part3, part4 = split_article(paragraf)
-            artikels = [part1, part2, part3, part4]
-        else :
-            artikels = [paragraf]
-        title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base)
-        return title, judul, url, contents
     else:
         wd.get(source)
@@ -274,11 +270,11 @@ def scrap_artikel(source_type,source,models,api_key,azure_api_base):
             artikels = [part1, part2, part3, part4]
         else :
             artikels = [paragraf]
-        title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base)
         return title, judul, url, contents
 def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
-    title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base)
     teks_to_tags = artikel[0][:500]
     translated = []
     optimized = []
@@ -643,18 +639,18 @@ def post(endpoint,endpoint_media,username,password,tags,categories,metode):
     return response_post.json()
 def view_output():
-    try:
-        with open('judul.txt', 'r') as file:
-            judul = file.read()
-            title = '<h1>'+judul+'</h1>'
-        with open('kontent.txt', 'r') as file:
-            kontent = file.read()
-            time.sleep(5)
-        return title,kontent
-    except:
-        title = 'Gagal Generate Judul'
-        kontent = 'Gagal Generate Konten'
-        return title,kontent
 def save(title,content):
     with open("judul.txt", "w") as file:

         return response['choices'][0]['message']['content']
 def cek_url(url):
     with open("log_url.txt", 'r') as file:
         scraped_urls = set(url.strip() for url in file.readlines())
         scraped_urls.add(url)
         return False
+def scrap_portal(queri):
     api_key = 'AIzaSyDJUWVZG2oHkHSsYoqdqgUZwQC2Aa2kSok'
     search_engine_id = 'a0dc878459ceb4811'
+    num_pages = 3
     type = random.choice([' articles',' news',' trends',' technologies', ' future'])
     link = []
+    query = queri + type
     for page in range(num_pages):
         start_index = page * 10 + 1
     filter_link2 = [url for url in filter_link1 if "help" not in url]
     return filter_link2
+def clean_scrap(artikel,link,models,api_key,azure_api_base,keyword):
     new_artikel = []
     article = []
     if len(artikel) > 1:
     title = content[0].replace('title:', '').replace("Title:", '').strip()
     messages=[
         {"role": "system", "content": "You are a professional translator and rewriter"},
+        {"role": "user", "content": f"Please translate and rewrite this sentence into Indonesian language with the following requirements: \n1. The sentence should be concise, compact, and clear. \n2. The sentence length should not exceed 50 characters. \n3. The sentences should be professional, similar to typical article titles and sound more natural for a human to read. \n4. fokus keyword menggunakan keyword {keyword} harus ada di awal judul. \n5. Gaya Penulisan judul artikel seperti gaya forbes. \n6. Menggunakan bahasa indonesia yag mudah dipahami/familiar oleh manusia , :" +title+"\nDo not write any explanation and any pleasantries. Please use the following complete format to display the output: Judul:{hasil rewrite}"}
     ]
     if models == 'openai':
         judul = get_openai_response(messages,api_key)
     return title, judul, link, contents
+def scrap_artikel(source_type,source,models,api_key,azure_api_base,keyword):
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
     options.add_argument('--no-sandbox')
                 artic=paragraph.get_text()
                 artikel.append(artic)
+            paragraf = ' '.join(artikel)
+            if len(paragraf)>= 18000:
+                part1, part2, part3, part4 = split_article(paragraf)
+                artikels = [part1, part2, part3, part4]
+            else :
+                artikels = [paragraf]
+            title, judul, url, contents = clean_scrap(artikels,url,models,api_key,azure_api_base,keyword)
+            return title, judul, url, contents
     else:
         wd.get(source)
             artikels = [part1, part2, part3, part4]
         else :
             artikels = [paragraf]
+        title, judul, url, contents = clean_scrap(artikels,source,models,api_key,azure_api_base,keyword)
         return title, judul, url, contents
 def artikel_processing(source_type,source,backlink,keyword,models,api_key,azure_api_base,replicate_key):
+    title, judul, url, artikel= scrap_artikel(source_type,source, models, api_key,azure_api_base,keyword)
     teks_to_tags = artikel[0][:500]
     translated = []
     optimized = []
     return response_post.json()
 def view_output():
+    # try:
+    with open('judul.txt', 'r') as file:
+        judul = file.read()
+        title = '<h1>'+judul+'</h1>'
+    with open('kontent.txt', 'r') as file:
+        kontent = file.read()
+        time.sleep(5)
+    return title,kontent
+    # except:
+    #     title = 'Gagal Generate Judul'
+    #     kontent = 'Gagal Generate Konten'
+    #     return title,kontent
 def save(title,content):
     with open("judul.txt", "w") as file: