Spaces:

Rajesh3338
/

Check

Runtime error

App Files Files Community

Rajesh3338 commited on Feb 7

Commit

cba278b

verified ·

1 Parent(s): 42ebb52

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -86

app.py CHANGED Viewed

@@ -1,92 +1,150 @@
-from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
-from PIL import Image
-import requests
-import torch
-from threading import Thread
 import gradio as gr
-from gradio import FileData
-import time
-import spaces
-ckpt = "mrcuddle/llama3.2-11B-Vision_instruct-Coder"
-model = MllamaForConditionalGeneration.from_pretrained(ckpt,
-    torch_dtype=torch.bfloat16).to("cuda")
-processor = AutoProcessor.from_pretrained(ckpt)
-@spaces.GPU
-def bot_streaming(message, history, max_new_tokens=250):
-    txt = message["text"]
-    ext_buffer = f"{txt}"
-    messages= []
-    images = []
-    for i, msg in enumerate(history):
-        if isinstance(msg[0], tuple):
-            messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
-            images.append(Image.open(msg[0][0]).convert("RGB"))
-        elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
-            # messages are already handled
-            pass
-        elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
-            messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
-    # add current message
-    if len(message["files"]) == 1:
-        if isinstance(message["files"][0], str): # examples
-            image = Image.open(message["files"][0]).convert("RGB")
-        else: # regular input
-            image = Image.open(message["files"][0]["path"]).convert("RGB")
-        images.append(image)
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
-    else:
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
-    texts = processor.apply_chat_template(messages, add_generation_prompt=True)
-    if images == []:
-        inputs = processor(text=texts, return_tensors="pt").to("cuda")
-    else:
-        inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
-    streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
-    generated_text = ""
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        generated_text_without_prompt = buffer
-        time.sleep(0.01)
-        yield buffer
-demo = gr.ChatInterface(fn=bot_streaming, title="Multimodal Llama",examples=[
-    [{"text": "Replicate this webpage using Tyepescript and ChakraUI.", "files":["./examples/Untitled.png"]},
-    2000],
-],
-      textbox=gr.MultimodalTextbox(),
-      additional_inputs = [gr.Slider(
-              minimum=10,
-              maximum=2500,
-              value=500,
-              step=10,
-              label="Maximum number of new tokens to generate",
-          )
-        ],
-      cache_examples=False,
-      description="Yes, this space can replicate (to the model's best ability) a webpage in your preferred language.",
-      stop_btn="Stop Generation",
-      fill_height=True,
-    multimodal=True)
-demo.launch(debug=True)

+import os
 import gradio as gr
+import torch
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain_huggingface import HuggingFacePipeline
+# Configure GPU settings
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+class CPSLChatbot:
+    def __init__(self):
+        self.initialize_components()
+    def initialize_components(self):
+        try:
+            # Load and process document
+            doc_loader = TextLoader("dataset.txt")
+            docs = doc_loader.load()
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000,
+                chunk_overlap=100
+            )
+            split_docs = text_splitter.split_documents(docs)
+            # Initialize embeddings and vector store
+            self.embeddings = HuggingFaceEmbeddings(
+                model_name="all-MiniLM-L6-v2",
+                model_kwargs={'device': device}
+            )
+            self.vectordb = FAISS.from_documents(split_docs, self.embeddings)
+            # Load model and tokenizer
+            model_name = "01-ai/Yi-Coder-9B-Chat"
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map="auto",
+                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                trust_remote_code=True
+            )
+            # Set up QA pipeline
+            self.qa_pipeline = pipeline(
+                "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                max_new_tokens=750,
+                pad_token_id=self.tokenizer.eos_token_id,
+                device=0 if device == "cuda" else -1
+            )
+            # Initialize LangChain components
+            llm = HuggingFacePipeline(pipeline=self.qa_pipeline)
+            retriever = self.vectordb.as_retriever(search_kwargs={"k": 5})
+            self.qa_chain = RetrievalQA.from_chain_type(
+                retriever=retriever,
+                chain_type="stuff",
+                llm=llm,
+                return_source_documents=False
+            )
+            print("Initialization completed successfully")
+        except Exception as e:
+            print(f"Initialization error: {str(e)}")
+            raise
+    def preprocess_query(self, query):
+        if "script" in query.lower() or "code" in query.lower():
+            return f"Write a CPSL script: {query}"
+        return query
+    def clean_response(self, response):
+        result = response.get("result", "")
+        if "Answer:" in result:
+            return result.split("Answer:")[1].strip()
+        return result.strip()
+    def get_response(self, user_input):
+        try:
+            processed_query = self.preprocess_query(user_input)
+            raw_response = self.qa_chain.invoke({"query": processed_query})
+            return self.clean_response(raw_response)
+        except Exception as e:
+            return f"Error processing query: {str(e)}"
+def create_gradio_interface():
+    chatbot = CPSLChatbot()
+    with gr.Blocks(title="CPSL Chatbot") as chat_interface:
+        gr.Markdown("# CPSL Chatbot with GPU Support")
+        gr.Markdown("Using Yi-Coder-9B-Chat model for CPSL script generation and queries")
+        chat_history = gr.Chatbot(
+            value=[],
+            elem_id="chatbot",
+            height=600
+        )
+        with gr.Row():
+            user_input = gr.Textbox(
+                label="Your Message:",
+                placeholder="Type your message here...",
+                show_label=True,
+                elem_id="user-input"
+            )
+            send_button = gr.Button("Send", variant="primary")
+        def chat_response(user_message, history):
+            if not user_message:
+                return history, history
+            bot_response = chatbot.get_response(user_message)
+            history.append((user_message, bot_response))
+            return history, history
+        send_button.click(
+            chat_response,
+            inputs=[user_input, chat_history],
+            outputs=[chat_history, chat_history],
+            api_name="chat"
+        )
+        # Clear the input textbox after sending
+        send_button.click(lambda: "", None, user_input)
+        # Also allow Enter key to send message
+        user_input.submit(
+            chat_response,
+            inputs=[user_input, chat_history],
+            outputs=[chat_history, chat_history],
+        )
+        user_input.submit(lambda: "", None, user_input)
+    return chat_interface
+if __name__ == "__main__":
+    interface = create_gradio_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        enable_queue=True
+    )