katsukiai commited on
Commit
ffd44b8
·
verified ·
1 Parent(s): dddf90e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -31
app.py CHANGED
@@ -11,53 +11,68 @@ from huggingface_hub import HfApi
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
  logger = logging.getLogger(__name__)
13
 
14
- # Load GPT-2 model and tokenizer
15
- MODEL_NAME = "gpt2"
16
- logger.info(f"Loading model: {MODEL_NAME} (CPU mode)")
17
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
- tokenizer.pad_token = tokenizer.eos_token # GPT-2 không có padding token, nên dùng eos_token
19
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
20
-
21
- # Function to process text with GPT-2
22
- def process_text_with_model(text):
23
- logger.info("Processing text with GPT-2 model (CPU)...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
25
  outputs = model.generate(**inputs, max_length=200)
26
- processed_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
- return processed_text
28
 
29
  # Function to convert text to JSON
30
  def text_to_json(text):
31
- lines = text.strip().split("\n")
32
- data = [{"text": line} for line in lines]
33
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
34
  filename = f"output_{timestamp}.json"
35
-
36
  with open(filename, "w") as f:
37
- json.dump(data, f, indent=4)
38
-
39
  logger.info(f"JSON file created: {filename}")
40
  return filename
41
 
42
  # Function to generate JSON and upload to Hugging Face
43
- def generate_and_upload(text):
44
  try:
45
  if not text.strip():
46
  raise ValueError("Text input is empty.")
47
 
48
- logger.info(f"Received text input: {text}")
49
 
50
- # Process text with GPT-2
51
- processed_text = process_text_with_model(text)
52
  logger.info(f"Processed text: {processed_text}")
53
 
54
- # Convert processed text to JSON
55
  json_file = text_to_json(processed_text)
56
 
57
  # Get Hugging Face API token
58
  token = os.getenv("HUGGINGFACE_API_TOKEN")
59
  if not token:
60
- raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
61
 
62
  # Upload file to Hugging Face
63
  api = HfApi()
@@ -86,16 +101,17 @@ def generate_and_upload(text):
86
  with gr.Blocks() as demo:
87
  with gr.Tab("About"):
88
  gr.Markdown("""
89
- # Text Processor with GPT-2 (CPU)
90
- - Processes text with GPT-2 Transformer
91
- - Converts output to JSON
92
  - Uploads to Hugging Face
93
 
94
  ## Instructions:
95
- 1. Enter text in the "Generate" tab.
96
- 2. Click "Generate and Upload."
97
- 3. Download JSON if needed.
98
- 4. Check upload status.
 
99
 
100
  ## Requirements:
101
  - **Runs on CPU** (No GPU required).
@@ -103,6 +119,7 @@ with gr.Blocks() as demo:
103
  """)
104
 
105
  with gr.Tab("Generate"):
 
106
  text_input = gr.Textbox(label="Enter text")
107
  output_message = gr.Textbox(label="Status message")
108
  json_file_downloader = gr.File(label="Download JSON", interactive=True)
@@ -110,7 +127,7 @@ with gr.Blocks() as demo:
110
 
111
  generate_button.click(
112
  fn=generate_and_upload,
113
- inputs=text_input,
114
  outputs=[output_message, json_file_downloader]
115
  )
116
 
 
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
  logger = logging.getLogger(__name__)
13
 
14
+ # List of 37 popular models
15
+ MODEL_LIST = [
16
+ "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl",
17
+ "facebook/opt-1.3b", "facebook/opt-2.7b", "facebook/opt-6.7b",
18
+ "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-Instruct",
19
+ "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf",
20
+ "microsoft/DialoGPT-small", "microsoft/DialoGPT-medium", "microsoft/DialoGPT-large",
21
+ "bigscience/bloom-560m", "bigscience/bloomz-560m",
22
+ "EleutherAI/gpt-neo-125m", "EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B",
23
+ "EleutherAI/gpt-j-6B", "EleutherAI/gpt-neox-20b",
24
+ "huggingfaceh4/starchat-alpha", "huggingfaceh4/zephyr-7b-alpha",
25
+ "deepseek-ai/deepseek-coder-1.3b", "deepseek-ai/deepseek-coder-6.7b",
26
+ "deepseek-ai/deepseek-v3", "databricks/dolly-v2-7b", "cerebras/Cerebras-GPT-1.3B",
27
+ "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct",
28
+ "google/gemma-2b", "google/gemma-7b", "google/flan-t5-large",
29
+ "stabilityai/stablelm-tuned-alpha-7b", "stabilityai/stablelm-2-7b-chat"
30
+ ]
31
+
32
+ # Function to load selected model
33
+ def load_model(model_name):
34
+ logger.info(f"Loading model: {model_name} (CPU mode)")
35
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
36
+ tokenizer.pad_token = tokenizer.eos_token # Avoid padding token errors
37
+ model = AutoModelForCausalLM.from_pretrained(model_name)
38
+ return tokenizer, model
39
+
40
+ # Function to process text with selected model
41
+ def process_text(model_name, text):
42
+ tokenizer, model = load_model(model_name)
43
+ logger.info(f"Processing text with {model_name}...")
44
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
45
  outputs = model.generate(**inputs, max_length=200)
46
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
47
 
48
  # Function to convert text to JSON
49
  def text_to_json(text):
 
 
50
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
51
  filename = f"output_{timestamp}.json"
 
52
  with open(filename, "w") as f:
53
+ json.dump([{"text": text}], f, indent=4)
 
54
  logger.info(f"JSON file created: {filename}")
55
  return filename
56
 
57
  # Function to generate JSON and upload to Hugging Face
58
+ def generate_and_upload(model_name, text):
59
  try:
60
  if not text.strip():
61
  raise ValueError("Text input is empty.")
62
 
63
+ logger.info(f"Received text input for model {model_name}")
64
 
65
+ # Process text
66
+ processed_text = process_text(model_name, text)
67
  logger.info(f"Processed text: {processed_text}")
68
 
69
+ # Convert to JSON
70
  json_file = text_to_json(processed_text)
71
 
72
  # Get Hugging Face API token
73
  token = os.getenv("HUGGINGFACE_API_TOKEN")
74
  if not token:
75
+ raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN.")
76
 
77
  # Upload file to Hugging Face
78
  api = HfApi()
 
101
  with gr.Blocks() as demo:
102
  with gr.Tab("About"):
103
  gr.Markdown("""
104
+ # Text Processor with Selectable Model (CPU)
105
+ - Choose from **37 popular transformer models**
106
+ - Processes text and converts to JSON
107
  - Uploads to Hugging Face
108
 
109
  ## Instructions:
110
+ 1. Select a model from the dropdown.
111
+ 2. Enter text in the "Generate" tab.
112
+ 3. Click "Generate and Upload."
113
+ 4. Download JSON if needed.
114
+ 5. Check upload status.
115
 
116
  ## Requirements:
117
  - **Runs on CPU** (No GPU required).
 
119
  """)
120
 
121
  with gr.Tab("Generate"):
122
+ model_selector = gr.Dropdown(choices=MODEL_LIST, value="gpt2", label="Choose Model")
123
  text_input = gr.Textbox(label="Enter text")
124
  output_message = gr.Textbox(label="Status message")
125
  json_file_downloader = gr.File(label="Download JSON", interactive=True)
 
127
 
128
  generate_button.click(
129
  fn=generate_and_upload,
130
+ inputs=[model_selector, text_input],
131
  outputs=[output_message, json_file_downloader]
132
  )
133