Jhakx commited on
Commit
3ae75b1
·
verified ·
1 Parent(s): c541e70

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+
7
+ # Load datasets
8
+ nsfw_datasets = [
9
+ load_dataset("aifeifei798/DPO_Pairs-Roleplay-NSFW"),
10
+ load_dataset("Maxx0/sexting-nsfw-adultconten"),
11
+ load_dataset("QuietImpostor/Claude-3-Opus-Claude-3.5-Sonnnet-9k"),
12
+ load_dataset("HuggingFaceTB/everyday-conversations-llama3.1-2k"),
13
+ load_dataset("Chadgpt-fam/sexting_dataset")
14
+ ]
15
+
16
+ # Prepare all texts from datasets
17
+ all_texts = []
18
+ for dataset in nsfw_datasets:
19
+ for split in dataset.keys():
20
+ if 'text' in dataset[split].features:
21
+ all_texts.extend(dataset[split]['text'])
22
+ elif 'content' in dataset[split].features:
23
+ all_texts.extend(dataset[split]['content'])
24
+
25
+ # Create TF-IDF vectorizer
26
+ vectorizer = TfidfVectorizer()
27
+ tfidf_matrix = vectorizer.fit_transform(all_texts)
28
+
29
+ def find_best_description(input_text):
30
+ input_vector = vectorizer.transform([input_text])
31
+ similarities = cosine_similarity(input_vector, tfidf_matrix)
32
+ most_similar_index = np.argmax(similarities)
33
+ return all_texts[most_similar_index]
34
+
35
+ def generate_text(input_text):
36
+ return find_best_description(input_text)
37
+
38
+ # Create Gradio interface
39
+ iface = gr.Interface(
40
+ fn=generate_text,
41
+ inputs=gr.Textbox(label="Enter text to describe"),
42
+ outputs="text",
43
+ title="NSFW Text Descriptor",
44
+ description="Enter text to find the best description from NSFW datasets.",
45
+ allow_flagging="never"
46
+ )
47
+
48
+ # Launch the app
49
+ if __name__ == "__main__":
50
+ iface.launch()