Spaces:

TAgroup5
/

demo-News_classifier

Sleeping

App Files Files Community

TAgroup5 commited on Mar 28

Commit

6ad09a0

verified ·

1 Parent(s): f3c8efb

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -7,27 +7,24 @@ import nltk
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
-from transformers import AutoTokenizer
 import nltk
 nltk.download('punkt', download_dir='/root/nltk_data')
 nltk.download('stopwords', download_dir='/root/nltk_data')
 nltk.download('wordnet', download_dir='/root/nltk_data')
 # Initialize lemmatizer and stopwords
 lemmatizer = WordNetLemmatizer()
 stop_words = set(stopwords.words('english'))
-# Load fine-tuned model and tokenizer
-model_name = "TAgroup5/news-classification-model"
-model = AutoModelForSequenceClassification.from_pretrained("TAgroup5/news-classification-model")
-tokenizer = AutoTokenizer.from_pretrained("TAgroup5/news-classification-model")
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-qa_model_name = "distilbert-base-uncased-distilled-squad"  # Example of a common Q&A model
-qa_pipeline = pipeline("question-answering", model=qa_model_name, tokenizer=qa_model_name)
 # Streamlit App
 st.title("News Classification and Q&A")
@@ -50,7 +47,7 @@ if uploaded_file is not None:
         st.write("Preview of uploaded data:")
         st.dataframe(df.head())
-        # Preprocessing function
         def preprocess_text(text):
             text = text.lower()  # Convert to lowercase
             text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters & numbers
@@ -61,6 +58,8 @@ if uploaded_file is not None:
         # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
         # Show results
@@ -80,6 +79,8 @@ question = st.text_input("Ask a question:")
 context = st.text_area("Provide the news article or content for the Q&A:", height=150)
 if question and context.strip():
     result = qa_pipeline(question=question, context=context)
     # Check if the result contains an answer

 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 import nltk
+# Download NLTK resources
 nltk.download('punkt', download_dir='/root/nltk_data')
 nltk.download('stopwords', download_dir='/root/nltk_data')
 nltk.download('wordnet', download_dir='/root/nltk_data')
 # Initialize lemmatizer and stopwords
 lemmatizer = WordNetLemmatizer()
 stop_words = set(stopwords.words('english'))
+# Load fine-tuned model and tokenizer (adjust the model name)
+model_name = "TAgroup5/news-classification-model"  # Replace with the correct model name
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Initialize pipelines
 text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
 # Streamlit App
 st.title("News Classification and Q&A")
         st.write("Preview of uploaded data:")
         st.dataframe(df.head())
+        # Preprocessing function to clean the text
         def preprocess_text(text):
             text = text.lower()  # Convert to lowercase
             text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters & numbers
         # Apply preprocessing and classification
         df['processed_content'] = df['content'].apply(preprocess_text)
+        # Classify each record into one of the five classes
         df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
         # Show results
 context = st.text_area("Provide the news article or content for the Q&A:", height=150)
 if question and context.strip():
+    qa_model_name = "distilbert-base-uncased-distilled-squad"  # Example of a common Q&A model
+    qa_pipeline = pipeline("question-answering", model=qa_model_name, tokenizer=qa_model_name)
     result = qa_pipeline(question=question, context=context)
     # Check if the result contains an answer