Spaces:

indonesian-nlp
/

news-generator

Runtime error

App Files Files Community

cahya commited on Nov 13, 2022

Commit

a0df823

1 Parent(s): 8f794cd

remove print out hf_

Browse files

Files changed (1) hide show

app/app.py +3 -16

app/app.py CHANGED Viewed

@@ -16,7 +16,6 @@ mirror_url = "https://news-generator.ai-research.id/"
 if "MIRROR_URL" in os.environ:
     mirror_url = os.environ["MIRROR_URL"]
 hf_auth_token = os.getenv("HF_AUTH_TOKEN", False)
-st.write(f"Using Hugging Face auth token: {hf_auth_token[:10]}...")
 MODELS = {
     "Indonesian Newspaper - Indonesian GPT-2 Medium": {
@@ -80,24 +79,12 @@ def process(text_generator, tokenizer, title: str, keywords: str, text: str,
         min_penalty = 1.05
         max_penalty = 1.5
         repetition_penalty = max(min_penalty + (1.0 - temperature) * (max_penalty - min_penalty), 0.8)
-    # print("title:", title)
-    # print("keywords:", keywords)
     prompt = f"title: {title}\nkeywords: {keywords}\n{text}"
-    # print("prompt: ", prompt)
     generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
     # device = torch.device("cuda")
     # generated = generated.to(device)
-    print("do_sample:", do_sample)
-    print("penalty_alpha:", penalty_alpha)
-    print("max_length:", max_length)
-    print("top_k:", top_k)
-    print("top_p:", top_p)
-    print("temperature:", temperature)
-    print("max_time:", max_time)
-    print("repetition_penalty:", repetition_penalty)
     text_generator.eval()
     sample_outputs = text_generator.generate(generated,
                                              penalty_alpha=penalty_alpha,
@@ -111,16 +98,17 @@ def process(text_generator, tokenizer, title: str, keywords: str, text: str,
                                              num_return_sequences=1
                                              )
     result = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
-    # print(f"result: {result}")
     prefix_length = len(title) + len(keywords) + len("title: keywords: ") + 2
     result = result[prefix_length:]
     return result
 st.title("Indonesian GPT-2 Applications")
 prompt_group_name = MODELS[model_type]["group"]
 st.header(prompt_group_name)
-description = f"This is a bilingual (Indonesian and English) abstract generator using Indonesian GPT-2 Medium. We finetuned it with the Indonesian paper abstract dataset."
 st.markdown(description)
 model_name = f"Model name: [{MODELS[model_type]['name']}](https://huggingface.co/{MODELS[model_type]['name']})"
 st.markdown(model_name)
@@ -246,7 +234,6 @@ if prompt_group_name in ["Indonesian Newspaper"]:
             time_end = time.time()
             time_diff = time_end - time_start
             # result = result[0]["generated_text"]
-            result = result[:result.find("title:")]
             st.write(result.replace("\n", "  \n"))
             st.text("Translation")
             translation = translate(result, "en", "id")

 if "MIRROR_URL" in os.environ:
     mirror_url = os.environ["MIRROR_URL"]
 hf_auth_token = os.getenv("HF_AUTH_TOKEN", False)
 MODELS = {
     "Indonesian Newspaper - Indonesian GPT-2 Medium": {
         min_penalty = 1.05
         max_penalty = 1.5
         repetition_penalty = max(min_penalty + (1.0 - temperature) * (max_penalty - min_penalty), 0.8)
     prompt = f"title: {title}\nkeywords: {keywords}\n{text}"
     generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
     # device = torch.device("cuda")
     # generated = generated.to(device)
     text_generator.eval()
     sample_outputs = text_generator.generate(generated,
                                              penalty_alpha=penalty_alpha,
                                              num_return_sequences=1
                                              )
     result = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
     prefix_length = len(title) + len(keywords) + len("title: keywords: ") + 2
     result = result[prefix_length:]
+    title_index = result.find("title: ")
+    result = result[:title_index] if title_index > 0 else result
     return result
 st.title("Indonesian GPT-2 Applications")
 prompt_group_name = MODELS[model_type]["group"]
 st.header(prompt_group_name)
+description = f"This is a news generator using Indonesian GPT-2 Medium. We finetuned the pre-trained model with the Indonesian online newspaper dataset."
 st.markdown(description)
 model_name = f"Model name: [{MODELS[model_type]['name']}](https://huggingface.co/{MODELS[model_type]['name']})"
 st.markdown(model_name)
             time_end = time.time()
             time_diff = time_end - time_start
             # result = result[0]["generated_text"]
             st.write(result.replace("\n", "  \n"))
             st.text("Translation")
             translation = translate(result, "en", "id")