import os
import shutil
import tempfile
import fitz  # PyMuPDF
import streamlit as st
import logging

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader

# --- Streamlit Config ---
st.set_page_config(page_title="📚 RAG PDF Chatbot", layout="wide")
st.title("📚 RAG-based PDF Chatbot")

# --- Logging ---
logging.basicConfig(level=logging.INFO)

# --- Load Model ---
@st.cache_resource
def load_model():
    checkpoint = "MBZUAI/LaMini-T5-738M"
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
    return HuggingFacePipeline(pipeline=pipe)

# --- Extract PDF Text ---
def extract_text_from_pdf(file):
    try:
        doc = fitz.open(stream=file.read(), filetype="pdf")
        return "\n".join([page.get_text() for page in doc])
    except Exception as e:
        logging.error(f"Error reading PDF: {e}")
        return ""

# --- Create Chroma Vectorstore Safely ---
def create_vectorstore(documents, embeddings):
    temp_dir = tempfile.mkdtemp()  # unique, writable temp dir
    db = Chroma.from_documents(documents, embedding=embeddings, persist_directory=temp_dir)
    return db

# --- Build RAG QA Chain ---
def build_qa_chain(retriever, llm):
    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="""
You are a helpful assistant. Use the context below to answer the user's question as accurately and truthfully as possible.

Context:
{context}

Question:
{question}

Helpful Answer:
"""
    )
    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type_kwargs={"prompt": prompt_template})

# --- Process QA ---
def process_question(question, full_text):
    # Write PDF text to temp file
    with open("temp_text.txt", "w") as f:
        f.write(full_text)

    loader = TextLoader("temp_text.txt")
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    chunks = text_splitter.split_documents(docs)

    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = create_vectorstore(chunks, embeddings)
    retriever = vectorstore.as_retriever()

    llm = load_model()
    qa = build_qa_chain(retriever, llm)
    return qa.run(question)

# --- Sidebar Upload ---
with st.sidebar:
    st.header("📄 Upload your PDF")
    uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])

# --- Main Logic ---
if uploaded_file:
    st.success(f"Uploaded: {uploaded_file.name}")
    full_text = extract_text_from_pdf(uploaded_file)

    if full_text:
        with st.expander("📄 View Extracted PDF Text", expanded=False):
            st.write(full_text[:3000] + ("..." if len(full_text) > 3000 else ""))

        st.subheader("💬 Ask Something")
        user_question = st.text_input("Ask a question about the document")

        if user_question:
            with st.spinner("Analyzing..."):
                try:
                    answer = process_question(user_question, full_text)
                except Exception as e:
                    st.error("⚠️ Something went wrong. Try re-uploading the PDF.")
                    st.stop()
                st.markdown("### 🤖 Answer")
                st.write(answer)

        with st.sidebar:
            st.markdown("---")
            st.caption("💡 Sample Questions")
            st.markdown("""
            - "Summarize the document"
            - "What is the experience of Pradeep Singh Sengar?"
            - "What are the key points?"
            - "Explain in short"
            """)
    else:
        st.error("❌ Could not extract text. Try a different PDF.")
else:
    st.info("Upload a PDF to get started.")