Spaces:

Gopi9177
/

NLP

Running

App Files Files Community

NLP / pages /Basics Of NLP.py

Gopi9177

Update pages/Basics Of NLP.py

79116c1 verified 3 months ago

raw

history blame contribute delete

4.51 kB

	import streamlit as st
	import nltk
	import spacy
	from nltk.tokenize import word_tokenize, sent_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer, WordNetLemmatizer
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

	# Download necessary NLTK data
	nltk.download("punkt")
	nltk.download("stopwords")
	nltk.download("wordnet")
	nltk.download("vader_lexicon")


	# Streamlit app configuration
	st.set_page_config(page_title="NLP Basics", page_icon="🤖", layout="wide")
	st.title("🤖 NLP Basics for Beginners")

	st.markdown("""
	### Welcome to the NLP Basics App!
	Explore core Natural Language Processing (NLP) concepts with interactive examples, including:
	- Tokenization (Splitting text into words/sentences)
	- Stopword Removal (Filtering common words)
	- Lemmatization & Stemming (Word normalization)
	- Bag of Words (BoW) & TF-IDF (Vectorization techniques)
	- Named Entity Recognition (NER) (Extract key entities)
	- Sentiment Analysis (Understand emotions in text)
	- Text Summarization (Generate concise summaries)
	- Word Cloud Visualization (Highlight common words)
	""")

	# User Input Text Box
	st.subheader("Enter Text to Analyze")
	text_input = st.text_area("Type or paste some text here...", height=150)

	if not text_input.strip():
	st.warning("Please enter some text to explore NLP concepts.")

	# NLP Processing Sections
	tabs = st.tabs(["Tokenization", "Stopwords", "Lemmatization & Stemming", "Bag of Words (BoW)", "TF-IDF"])

	# Tokenization
	with tabs[0]:
	st.header("🔤 Tokenization")
	st.write("Types of Tokenization:")
	st.write("- Sentence Tokenization: Splitting text into sentences.")
	st.write("- Word Tokenization: Splitting text into individual words (tokens).")
	st.write("Example Input: \"I love NLP. It's amazing!\"")
	st.write("Sentence Tokens: [\"I love NLP.\", \"It's amazing!\"]")
	st.write("Word Tokens: [\"I\", \"love\", \"NLP\", \".\", \"It\", \"'s\", \"amazing\", \"!\"]")
	if text_input.strip():
	st.write("Sentence Tokenization:", sent_tokenize(text_input))
	st.write("Word Tokenization:", word_tokenize(text_input))

	# Stopwords Removal
	with tabs[1]:
	st.header("🛑 Stopwords Removal")
	st.write("Example Input: \"This is an example of stopwords removal.\"")
	st.write("Output: [\"example\", \"stopwords\", \"removal\"]")
	if text_input.strip():
	stop_words = set(stopwords.words("english"))
	words = word_tokenize(text_input)
	filtered_words = [word for word in words if word.lower() not in stop_words]
	st.write("Filtered Words:", filtered_words)

	# Lemmatization & Stemming
	with tabs[2]:
	st.header("🌱 Lemmatization & Stemming")
	st.write("Example Input: \"running studies\"")
	st.write("Stemmed Output: [\"run\", \"studi\"]")
	st.write("Lemmatized Output: [\"run\", \"study\"]")
	if text_input.strip():
	words = word_tokenize(text_input)
	ps = PorterStemmer()
	stemmed_words = [ps.stem(word) for word in words]
	st.write("Stemmed Words:", stemmed_words)
	lemmatizer = WordNetLemmatizer()
	lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
	st.write("Lemmatized Words:", lemmatized_words)

	# Bag of Words (BoW)
	with tabs[3]:
	st.header("📦 Bag of Words (BoW)")
	st.write("Example Input: \"I love NLP. NLP is great!\"")
	st.write("BoW Representation: { 'I':1, 'love':1, 'NLP':2, 'is':1, 'great':1 }")
	if text_input.strip():
	vectorizer = CountVectorizer()
	X = vectorizer.fit_transform([text_input])
	st.write("BoW Matrix:", X.toarray())
	st.write("Feature Names:", vectorizer.get_feature_names_out())

	# TF-IDF
	with tabs[4]:
	st.header("📊 TF-IDF (Term Frequency-Inverse Document Frequency)")
	st.write("Example Input: \"I love NLP. NLP is great!\"")
	st.write("TF-IDF Representation: Higher weights for rare but important words.")
	if text_input.strip():
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_matrix = tfidf_vectorizer.fit_transform([text_input])
	st.write("TF-IDF Matrix:", tfidf_matrix.toarray())
	st.write("Feature Names:", tfidf_vectorizer.get_feature_names_out())


	# Footer
	st.markdown("---")
	st.markdown("""
	<center>
	<p style='font-size:14px;'>© 2024 NLP Basics App. All Rights Reserved.</p>
	</center>
	""", unsafe_allow_html=True)