Spaces:

ravindravala
/

humaniser

Running

File size: 2,419 Bytes

96b5d9c
 
 
 
 
 
 
 
 
 
 
 
 
 
3aeccd5
 
 
 
96b5d9c
 
 
 
 
 
 
 
 
 
 
3aeccd5
 
 
 
 
 
 
 
 
 
 
 
96b5d9c
 
 
3aeccd5
25bb492
96b5d9c
 
 
 
 
 
3aeccd5
96b5d9c
 
 
 
3aeccd5
96b5d9c
 
 
 
 
 
 
 
 
 
3aeccd5
 
 
 
96b5d9c
 
 
 
3aeccd5
96b5d9c
 
 
879f4aa
96b5d9c

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import re

# Set manual seed for reproducibility
torch.manual_seed(42)

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained(
    "humarin/chatgpt_paraphraser_on_T5_base"
).to(device)


# Function to paraphrase text
def humanize_text(text, temperature=0.7, max_length=512):
    input_ids = tokenizer(
        f"paraphrase: {text}",
        return_tensors="pt",
        padding=True,
        max_length=max_length,
        truncation=True,
    ).input_ids.to(device)

    # outputs = model.generate(
    #     input_ids,
    #     max_length=max_length,
    #     temperature=temperature,
    #     num_beams=1,
    #     num_beam_groups=1,
    #     num_return_sequences=1,
    #     repetition_penalty=2.0,
    #     diversity_penalty=0.5,
    #     no_repeat_ngram_size=2,
    # )

    outputs = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=False,
        repetition_penalty=2.0,
        no_repeat_ngram_size=2,
    )

    paraphrased_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return random.choice(paraphrased_texts)


# Function to split input into sentences
def split_into_sentences(text):
    return re.split(r"(?<=[.!?])\s+", text)


# Function to process multi-line text
def process_text(input_text):
    lines = input_text.split("\n")
    processed_lines = []

    for line in lines:
        if len(line) < 1:
            processed_lines.append(line)
        else:
            sentences = split_into_sentences(line)
            processed_sentences = [
                humanize_text(sentence, max_length=len(sentence))
                for sentence in sentences
            ]
            processed_lines.append(" ".join(processed_sentences))

    return "\n".join(processed_lines)


# Gradio Interface
iface = gr.Interface(
    fn=process_text,
    inputs=gr.Textbox(lines=5, placeholder="Enter text to humanize...", max_length=2000),
    outputs="text",
    title="AI Text Humanizer",
    description="Enter text, and the AI will rewrite it in a more human-like way.",
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()