Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,231 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
3 |
+
import torch
|
4 |
+
from translatepy import Translator
|
5 |
+
import logging
|
6 |
+
from typing import Dict, Optional
|
7 |
+
import random
|
8 |
+
import time
|
9 |
+
from concurrent.futures import ThreadPoolExecutor
|
10 |
+
import nltk
|
11 |
+
from nltk.tokenize import sent_tokenize
|
12 |
+
nltk.download('punkt')
|
13 |
+
|
14 |
+
# Configure logging
|
15 |
+
logging.basicConfig(
|
16 |
+
level=logging.INFO,
|
17 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
18 |
+
)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
class TextHumanizer:
|
22 |
+
def __init__(self):
|
23 |
+
# Initialize AI detection model
|
24 |
+
self.detector = pipeline(
|
25 |
+
"text-classification",
|
26 |
+
model="roberta-base-openai-detector",
|
27 |
+
device=0 if torch.cuda.is_available() else -1
|
28 |
+
)
|
29 |
+
|
30 |
+
# Initialize text generation model
|
31 |
+
self.humanizer = pipeline(
|
32 |
+
"text2text-generation",
|
33 |
+
model="facebook/bart-large-cnn",
|
34 |
+
device=0 if torch.cuda.is_available() else -1
|
35 |
+
)
|
36 |
+
|
37 |
+
# Initialize translator
|
38 |
+
self.translator = Translator()
|
39 |
+
|
40 |
+
# Enhanced tone prompts with more natural variations
|
41 |
+
self.tone_prompts = {
|
42 |
+
"Casual": [
|
43 |
+
"Rewrite this in a laid-back, conversational style with some personality: {text}",
|
44 |
+
"Make this sound more casual and friendly, like you're chatting with a buddy: {text}",
|
45 |
+
"Rephrase this in a relaxed way, adding some humor where it fits: {text}"
|
46 |
+
],
|
47 |
+
"Business": [
|
48 |
+
"Transform this into clear, professional business language: {text}",
|
49 |
+
"Rewrite this for a corporate audience, maintaining professionalism: {text}",
|
50 |
+
"Convert this into business-appropriate language with precise terminology: {text}"
|
51 |
+
],
|
52 |
+
"Academic": [
|
53 |
+
"Rephrase this using scholarly language and academic conventions: {text}",
|
54 |
+
"Rewrite this for an academic paper, with proper technical terminology: {text}",
|
55 |
+
"Transform this into academic prose with appropriate formal language: {text}"
|
56 |
+
],
|
57 |
+
"Creative": [
|
58 |
+
"Rewrite this with vivid imagery and engaging metaphors: {text}",
|
59 |
+
"Transform this using creative language and sensory details: {text}",
|
60 |
+
"Rephrase this with artistic flair and descriptive language: {text}"
|
61 |
+
],
|
62 |
+
"Email": [
|
63 |
+
"Convert this into a professional email while maintaining warmth: {text}",
|
64 |
+
"Rewrite this as a clear, concise email with appropriate tone: {text}",
|
65 |
+
"Transform this into email format with professional courtesy: {text}"
|
66 |
+
]
|
67 |
+
}
|
68 |
+
|
69 |
+
def detect_ai_text(self, text: str) -> float:
|
70 |
+
"""
|
71 |
+
Detect if text is AI-generated and return confidence score
|
72 |
+
"""
|
73 |
+
try:
|
74 |
+
# Split long text into sentences and analyze each chunk
|
75 |
+
sentences = sent_tokenize(text)
|
76 |
+
chunks = [' '.join(sentences[i:i+5]) for i in range(0, len(sentences), 5)]
|
77 |
+
|
78 |
+
scores = []
|
79 |
+
for chunk in chunks:
|
80 |
+
result = self.detector(chunk)[0]
|
81 |
+
if result['label'] == 'ARTIFICIAL':
|
82 |
+
scores.append(result['score'])
|
83 |
+
|
84 |
+
return max(scores) if scores else 0.0
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
logger.error(f"Error in AI detection: {str(e)}")
|
88 |
+
return 0.0
|
89 |
+
|
90 |
+
def add_human_variations(self, text: str) -> str:
|
91 |
+
"""
|
92 |
+
Add human-like variations to text
|
93 |
+
"""
|
94 |
+
# Add occasional filler words
|
95 |
+
fillers = ["well", "you know", "actually", "basically", "I mean"]
|
96 |
+
sentences = sent_tokenize(text)
|
97 |
+
|
98 |
+
for i in range(len(sentences)):
|
99 |
+
if random.random() < 0.3: # 30% chance to add filler
|
100 |
+
filler = random.choice(fillers)
|
101 |
+
sentences[i] = f"{filler}, {sentences[i].lower()}"
|
102 |
+
|
103 |
+
# Add minor grammatical variations
|
104 |
+
text = ' '.join(sentences)
|
105 |
+
text = text.replace(" can not ", " can't ")
|
106 |
+
text = text.replace(" do not ", " don't ")
|
107 |
+
|
108 |
+
return text
|
109 |
+
|
110 |
+
def humanize_text(
|
111 |
+
self,
|
112 |
+
text: str,
|
113 |
+
tone: str,
|
114 |
+
translate_to: Optional[str] = None,
|
115 |
+
max_retries: int = 3
|
116 |
+
) -> str:
|
117 |
+
"""
|
118 |
+
Main function to humanize text with error handling and retries
|
119 |
+
"""
|
120 |
+
try:
|
121 |
+
# Check if text is likely AI-generated
|
122 |
+
ai_score = self.detect_ai_text(text)
|
123 |
+
logger.info(f"AI detection score: {ai_score}")
|
124 |
+
|
125 |
+
if ai_score < 0.7: # If text already seems human-like
|
126 |
+
logger.info("Text appears human-written, making minor adjustments")
|
127 |
+
output = self.add_human_variations(text)
|
128 |
+
else:
|
129 |
+
# Select random prompt variation for the chosen tone
|
130 |
+
prompt = random.choice(self.tone_prompts[tone]).format(text=text)
|
131 |
+
|
132 |
+
# Try generation with retries
|
133 |
+
for attempt in range(max_retries):
|
134 |
+
try:
|
135 |
+
output = self.humanizer(
|
136 |
+
prompt,
|
137 |
+
max_length=min(len(text) * 2, 1000),
|
138 |
+
num_beams=4,
|
139 |
+
temperature=0.8,
|
140 |
+
top_p=0.9
|
141 |
+
)[0]['generated_text']
|
142 |
+
|
143 |
+
# Add human variations
|
144 |
+
output = self.add_human_variations(output)
|
145 |
+
|
146 |
+
# Verify the output is more human-like
|
147 |
+
new_ai_score = self.detect_ai_text(output)
|
148 |
+
if new_ai_score < ai_score:
|
149 |
+
break
|
150 |
+
elif attempt < max_retries - 1:
|
151 |
+
logger.warning(f"Attempt {attempt + 1}: Output still seems AI-like, retrying...")
|
152 |
+
time.sleep(1)
|
153 |
+
except Exception as e:
|
154 |
+
if attempt < max_retries - 1:
|
155 |
+
logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
|
156 |
+
time.sleep(1)
|
157 |
+
else:
|
158 |
+
raise
|
159 |
+
|
160 |
+
# Handle translation if requested
|
161 |
+
if translate_to and translate_to != "None":
|
162 |
+
lang_code = translate_to.split(" ")[0]
|
163 |
+
output = self.translator.translate(output, destination_language=lang_code).result
|
164 |
+
|
165 |
+
return output
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
logger.error(f"Error in humanization process: {str(e)}")
|
169 |
+
return f"Error processing text: {str(e)}"
|
170 |
+
|
171 |
+
# Initialize Gradio interface with improved UI
|
172 |
+
def create_interface():
|
173 |
+
humanizer = TextHumanizer()
|
174 |
+
|
175 |
+
def process_text(text: str, tone: str, translate_to: str) -> str:
|
176 |
+
return humanizer.humanize_text(text, tone, translate_to)
|
177 |
+
|
178 |
+
iface = gr.Interface(
|
179 |
+
fn=process_text,
|
180 |
+
inputs=[
|
181 |
+
gr.Textbox(
|
182 |
+
label="Input Text",
|
183 |
+
placeholder="Paste your AI-generated text here...",
|
184 |
+
lines=5
|
185 |
+
),
|
186 |
+
gr.Dropdown(
|
187 |
+
choices=list(humanizer.tone_prompts.keys()),
|
188 |
+
label="Writing Style",
|
189 |
+
value="Casual",
|
190 |
+
info="Select the desired writing style for the output"
|
191 |
+
),
|
192 |
+
gr.Dropdown(
|
193 |
+
choices=[
|
194 |
+
"None",
|
195 |
+
"da (Danish)",
|
196 |
+
"no (Norwegian)",
|
197 |
+
"sv (Swedish)",
|
198 |
+
"es (Spanish)",
|
199 |
+
"fr (French)",
|
200 |
+
"de (German)"
|
201 |
+
],
|
202 |
+
label="Translate to",
|
203 |
+
value="None",
|
204 |
+
info="Optional: translate the output to another language"
|
205 |
+
)
|
206 |
+
],
|
207 |
+
outputs=gr.Textbox(label="Humanized Output", lines=5),
|
208 |
+
title="AI Text Humanizer",
|
209 |
+
description="Convert AI-generated text into more natural, human-like writing",
|
210 |
+
examples=[
|
211 |
+
["The neural network processes information through multiple layers of interconnected nodes.",
|
212 |
+
"Casual",
|
213 |
+
"None"],
|
214 |
+
["The implementation of artificial intelligence in healthcare systems has shown promising results.",
|
215 |
+
"Business",
|
216 |
+
"es (Spanish)"]
|
217 |
+
],
|
218 |
+
cache_examples=True
|
219 |
+
)
|
220 |
+
return iface
|
221 |
+
|
222 |
+
if __name__ == "__main__":
|
223 |
+
# Set up thread pool for handling multiple requests
|
224 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
225 |
+
interface = create_interface()
|
226 |
+
interface.launch(
|
227 |
+
server_name="0.0.0.0",
|
228 |
+
server_port=7860,
|
229 |
+
share=True,
|
230 |
+
enable_queue=True
|
231 |
+
)
|