Segizu commited on
Commit
57d5dc6
·
1 Parent(s): e8e8a40

simple modelo

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +138 -23
  3. requirements.txt +6 -4
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -1,25 +1,140 @@
1
- from transformers import pipeline
2
- import gradio as gr
3
  import torch
 
 
 
 
 
4
 
5
- # Cargar un modelo de texto de Hugging Face (puede ser de completado, traducción, etc.)
6
- dispositivo = 0 if torch.cuda.is_available() else -1
7
- modelo = pipeline("text-generation", model="microsoft/Phi-4-multimodal-instruct", device=dispositivo, trust_remote_code=True) # Cambia el modelo si lo deseas
8
-
9
- # Función para generar texto
10
- def generar_texto(prompt):
11
- resultado = modelo(prompt, max_length=100, do_sample=True)
12
- return resultado[0]["generated_text"]
13
-
14
- # Interfaz con Gradio
15
- interfaz = gr.Interface(
16
- fn=generar_texto,
17
- inputs=gr.Textbox(label="Introduce tu texto"),
18
- outputs=gr.Textbox(label="Texto generado"),
19
- title="Generador de Texto con Hugging Face",
20
- description="Escribe un prompt y el modelo generará un texto basado en él."
21
- )
22
-
23
- # Ejecutar la app
24
- if __name__ == "__main__":
25
- interfaz.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ import os
5
+ from threading import Thread
6
+ import requests
7
+ import time
8
 
9
+ # Define model path for caching (Avoids reloading every app restart)
10
+ MODEL_PATH = "/mnt/data/Phi-4-Hindi"
11
+ TOKEN = os.environ.get("HF_TOKEN")
12
+ MODEL_NAME = "DrishtiSharma/Phi-4-Hindi-quantized"
13
+
14
+ # Load Model & Tokenizer Once
15
+ @st.cache_resource()
16
+ def load_model():
17
+ with st.spinner("Loading model... Please wait ⏳"):
18
+ try:
19
+ if not os.path.exists(MODEL_PATH):
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ MODEL_NAME, token=TOKEN, trust_remote_code=True, torch_dtype=torch.bfloat16
22
+ )
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=TOKEN)
24
+ model.save_pretrained(MODEL_PATH)
25
+ tokenizer.save_pretrained(MODEL_PATH)
26
+ else:
27
+ model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
29
+ except requests.exceptions.ConnectionError:
30
+ st.error("⚠️ Connection error! Unable to download the model. Please check your internet connection and try again.")
31
+ return None, None
32
+ except requests.exceptions.ReadTimeout:
33
+ st.error("⚠️ Read Timeout! The request took too long. Please try again later.")
34
+ return None, None
35
+
36
+ return model, tokenizer
37
+
38
+ # Load and move model to appropriate device
39
+ model, tok = load_model()
40
+ if model is None or tok is None:
41
+ st.stop()
42
+
43
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44
+ try:
45
+ model = model.to(device)
46
+ except torch.cuda.OutOfMemoryError:
47
+ st.error("⚠️ CUDA Out of Memory! Running on CPU instead.")
48
+ device = torch.device("cpu")
49
+ model = model.to(device)
50
+
51
+ terminators = [tok.eos_token_id]
52
+
53
+ # Initialize session state if not set
54
+ if "chat_history" not in st.session_state:
55
+ st.session_state.chat_history = []
56
+
57
+ # Chat function
58
+ def chat(message, temperature, do_sample, max_tokens):
59
+ """Processes chat input and generates a response using the model."""
60
+
61
+ # Append new message to history
62
+ st.session_state.chat_history.append({"role": "user", "content": message})
63
+
64
+ # Convert chat history into model-friendly format
65
+ messages = tok.apply_chat_template(st.session_state.chat_history, tokenize=False, add_generation_prompt=True)
66
+ model_inputs = tok([messages], return_tensors="pt").to(device)
67
+
68
+ # Initialize streamer for token-wise response
69
+ streamer = TextIteratorStreamer(tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
70
+
71
+ # Define generation parameters
72
+ generate_kwargs = {
73
+ "inputs": model_inputs["input_ids"],
74
+ "streamer": streamer,
75
+ "max_new_tokens": max_tokens,
76
+ "do_sample": do_sample,
77
+ "temperature": temperature,
78
+ "eos_token_id": terminators,
79
+ }
80
+
81
+ if temperature == 0:
82
+ generate_kwargs["do_sample"] = False
83
+
84
+ # Generate response asynchronously
85
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
86
+ t.start()
87
+
88
+ # Collect response as it streams
89
+ response_text = ""
90
+ for new_text in streamer:
91
+ response_text += new_text
92
+ yield response_text
93
+
94
+ # Save the assistant's response to session history
95
+ st.session_state.chat_history.append({"role": "assistant", "content": response_text})
96
+
97
+ # UI Setup
98
+ st.title("💬 Chat With Phi-4-Hindi")
99
+ st.success("✅ Model is READY to chat!")
100
+ st.markdown("Chat with [large-traversaal/Phi-4-Hindi](https://huggingface.co/large-traversaal/Phi-4-Hindi)")
101
+
102
+ # Sidebar Chat Settings
103
+ temperature = st.sidebar.slider("Temperature", 0.0, 1.0, 0.3, 0.1)
104
+ do_sample = st.sidebar.checkbox("Use Sampling", value=True)
105
+ max_tokens = st.sidebar.slider("Max Tokens", 128, 4096, 512, 1)
106
+ text_color = st.sidebar.selectbox("Text Color", ["Red", "Black", "Blue", "Green", "Purple"], index=0)
107
+ dark_mode = st.sidebar.checkbox("🌙 Dark Mode", value=False)
108
+
109
+ # Function to format chat messages
110
+ def get_html_text(text, color):
111
+ return f'<p style="color: {color.lower()}; font-size: 16px;">{text}</p>'
112
+
113
+ # Display chat history
114
+ for msg in st.session_state.chat_history:
115
+ role = "👤" if msg["role"] == "user" else "🤖"
116
+ st.markdown(get_html_text(f"**{role}:** {msg['content']}", text_color if role == "🤖" else "black"), unsafe_allow_html=True)
117
+
118
+ # User Input Handling
119
+ user_input = st.text_input("Type your message:", "")
120
+
121
+ if st.button("Send"):
122
+ if user_input.strip():
123
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
124
+
125
+ # Display chatbot response
126
+ with st.spinner("Generating response... 🤖💭"):
127
+ response_generator = chat(user_input, temperature, do_sample, max_tokens)
128
+ final_response = ""
129
+ for output in response_generator:
130
+ final_response = output # Store latest output
131
+
132
+ st.success("✅ Response generated!")
133
+ # Add generated response to session state
134
+ st.rerun()
135
+
136
+ if st.button("🧹 Clear Chat"):
137
+ with st.spinner("Clearing chat history..."):
138
+ st.session_state.chat_history = []
139
+ st.success("✅ Chat history cleared!")
140
+ st.rerun()
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
- gradio
2
  torch
3
- transformers
4
- requests
5
- peft
 
 
 
1
+ spaces
2
  torch
3
+ git+https://github.com/huggingface/transformers/
4
+ optimum
5
+ accelerate
6
+ bitsandbytes
7
+ scipy