Spaces:

irmchek
/

mynotebooksummary

Sleeping

App Files Files Community

irmchek commited on 12 days ago

Commit

462fea8

1 Parent(s): 3658694

prototype notebook summarizer

Browse files

Files changed (5) hide show

my_notebook.json +173 -0
notebook_enhancer.py +99 -47
requirements.txt +178 -29
test.ipynb +156 -0
test.json +0 -0

my_notebook.json ADDED Viewed

	@@ -0,0 +1,173 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "id": 1,
+   "source": [
+    "# Data Science Analysis Notebook\n",
+    "\n",
+    "This notebook contains some example Python code for data analysis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 2,
+   "source": [
+    "# Import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "# Set visualization style\n",
+    "sns.set(style='whitegrid')\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 3,
+   "source": [
+    "# Load the dataset\n",
+    "df = pd.read_csv('housing_data.csv')\n",
+    "\n",
+    "# Display basic information\n",
+    "print(f\"Dataset shape: {df.shape}\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 4,
+   "source": [
+    "# Perform data cleaning\n",
+    "# Fill missing values with median\n",
+    "for column in df.columns:\n",
+    "    if df[column].dtype in ['float64', 'int64']:\n",
+    "        df[column].fillna(df[column].median(), inplace=True)\n",
+    "    else:\n",
+    "        df[column].fillna(df[column].mode()[0], inplace=True)\n",
+    "\n",
+    "# Check for remaining missing values\n",
+    "print(\"Missing values after cleaning:\")\n",
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 5,
+   "source": [
+    "# Exploratory data analysis\n",
+    "# Create correlation matrix\n",
+    "numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns\n",
+    "correlation_matrix = df[numeric_columns].corr()\n",
+    "\n",
+    "# Plot heatmap\n",
+    "plt.figure(figsize=(12, 10))\n",
+    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)\n",
+    "plt.title('Correlation Matrix of Numeric Features', fontsize=18)\n",
+    "plt.xticks(rotation=45, ha='right')\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 6,
+   "source": [
+    "# Feature engineering\n",
+    "# Create new features\n",
+    "if 'bedrooms' in df.columns and 'total_rooms' in df.columns:\n",
+    "    df['bedrooms_ratio'] = df['bedrooms'] / df['total_rooms']\n",
+    "\n",
+    "if 'total_rooms' in df.columns and 'households' in df.columns:\n",
+    "    df['rooms_per_household'] = df['total_rooms'] / df['households']\n",
+    "\n",
+    "# Scale numeric features\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "df[numeric_columns] = scaler.fit_transform(df[numeric_columns])\n",
+    "\n",
+    "# Display transformed data\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "id": 7,
+   "source": [
+    "# Build a simple prediction model\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import mean_squared_error, r2_score\n",
+    "\n",
+    "# Assume we're predicting median_house_value\n",
+    "if 'median_house_value' in df.columns:\n",
+    "    # Prepare features and target\n",
+    "    X = df.drop('median_house_value', axis=1)\n",
+    "    y = df['median_house_value']\n",
+    "    \n",
+    "    # Split the data\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "    \n",
+    "    # Train the model\n",
+    "    model = LinearRegression()\n",
+    "    model.fit(X_train, y_train)\n",
+    "    \n",
+    "    # Make predictions\n",
+    "    y_pred = model.predict(X_test)\n",
+    "    \n",
+    "    # Evaluate the model\n",
+    "    mse = mean_squared_error(y_test, y_pred)\n",
+    "    r2 = r2_score(y_test, y_pred)\n",
+    "    \n",
+    "    print(f\"Mean Squared Error: {mse:.2f}\")\n",
+    "    print(f\"R² Score: {r2:.2f}\")\n",
+    "    \n",
+    "    # Plot actual vs predicted values\n",
+    "    plt.figure(figsize=(10, 6))\n",
+    "    plt.scatter(y_test, y_pred, alpha=0.5)\n",
+    "    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')\n",
+    "    plt.xlabel('Actual Values')\n",
+    "    plt.ylabel('Predicted Values')\n",
+    "    plt.title('Actual vs Predicted Values')\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

notebook_enhancer.py CHANGED Viewed

@@ -1,90 +1,140 @@
 import nbformat
 import gradio as gr
 from transformers import pipeline
 class NotebookEnhancer:
     def __init__(self):
-        # Initialize Hugging Face models
-        self.title_generator = pipeline(
-            "summarization", model="facebook/bart-large-cnn"
-        )
-        self.summary_generator = pipeline(
-            "summarization", model="sshleifer/distilbart-cnn-12-6"
         )
     def generate_title(self, code):
         """Generate a concise title for a code cell"""
         # Limit input length to match model constraints
-        max_length = 1024
         truncated_code = code[:max_length] if len(code) > max_length else code
-        result = self.title_generator(
-            truncated_code, max_length=10, min_length=3, do_sample=False
-        )
-        title = result[0]["summary_text"].strip()
         # Format as a markdown title
-        return f"## {title.capitalize()}"
     def generate_summary(self, code):
         """Generate a detailed summary for a code cell"""
-        # Limit input length to match model constraints
-        max_length = 1024
-        truncated_code = code[:max_length] if len(code) > max_length else code
-        result = self.summary_generator(
-            truncated_code, max_length=100, min_length=30, do_sample=True
-        )
-        return result[0]["summary_text"].strip()
-    def enhance_notebook(self, notebook_content):
         """Add title and summary markdown cells before each code cell"""
-        # Load the notebook
-        notebook = nbformat.reads(notebook_content, as_version=4)
         # Create a new notebook
         enhanced_notebook = nbformat.v4.new_notebook()
         enhanced_notebook.metadata = notebook.metadata
         # Process each cell
         i = 0
         while i < len(notebook.cells):
             cell = notebook.cells[i]
             # For code cells, add title and summary markdown cells
             if cell.cell_type == "code" and cell.source.strip():
-                # Generate title
-                title = self.generate_title(cell.source)
-                title_cell = nbformat.v4.new_markdown_cell(title)
-                enhanced_notebook.cells.append(title_cell)
                 # Generate summary
                 summary = self.generate_summary(cell.source)
                 summary_cell = nbformat.v4.new_markdown_cell(summary)
                 enhanced_notebook.cells.append(summary_cell)
             # Add the original cell
             enhanced_notebook.cells.append(cell)
             i += 1
-        # Convert back to string
-        return nbformat.writes(enhanced_notebook)
-def process_notebook(file):
     """Process an uploaded notebook file"""
     enhancer = NotebookEnhancer()
-    # Read uploaded file
-    notebook_content = file.decode("utf-8")
     # Process the notebook
-    enhanced_notebook = enhancer.enhance_notebook(notebook_content)
     # Save to temp file
     output_path = "enhanced_notebook.ipynb"
     with open(output_path, "w", encoding="utf-8") as f:
-        f.write(enhanced_notebook)
     return output_path
@@ -118,5 +168,7 @@ def build_gradio_interface():
 # This will be the entry point when running the script
 if __name__ == "__main__":
-    demo = build_gradio_interface()
-    demo.launch()

 import nbformat
+import spacy
 import gradio as gr
 from transformers import pipeline
+from tokenize import tokenize
+from transformers import (
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    AutoConfig,
+    pipeline,
+    SummarizationPipeline,
+)
+import re
+MODEL_NAME = "sagard21/python-code-explainer"
 class NotebookEnhancer:
     def __init__(self):
+        self.config = AutoConfig.from_pretrained(MODEL_NAME)
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding=True)
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+        self.model.eval()
+        self.pipeline = pipeline(
+            "summarization",
+            model=MODEL_NAME,
+            config=self.config,
+            tokenizer=self.tokenizer,
         )
+        self.nlp = spacy.load("en_core_web_sm")
     def generate_title(self, code):
         """Generate a concise title for a code cell"""
         # Limit input length to match model constraints
+        max_length = len(code) // 2
+        print("Title Max length", max_length)
         truncated_code = code[:max_length] if len(code) > max_length else code
+        max_length = len(truncated_code) // 2
+        title = self.pipeline(code, min_length=5, max_length=30)[0][
+            "summary_text"
+        ].strip()
+        print("Result title", title)
         # Format as a markdown title
+        return f"# {title.capitalize()}"
+    def _count_num_words(self, code):
+        words = code.split(" ")
+        return len(words)
     def generate_summary(self, code):
         """Generate a detailed summary for a code cell"""
+        # result = self.pipeline([code], min_length=3, max_length=len(code // 2))
+        print("Code", code)
+        result = self.pipeline(code, min_length=5, max_length=30)
+        print(result)
+        summary = result[0]["summary_text"].strip()
+        summary = self._postprocess_summary(summary)
+        print("Result summary", summary)
+        # print(self._is_valid_sentence_nlp(summary))
+        # summary = result[0]["summary_text"].strip()
+        return f"{summary}"
+    def enhance_notebook(self, notebook: nbformat.notebooknode.NotebookNode):
         """Add title and summary markdown cells before each code cell"""
         # Create a new notebook
         enhanced_notebook = nbformat.v4.new_notebook()
         enhanced_notebook.metadata = notebook.metadata
+        print(len(notebook.cells))
         # Process each cell
         i = 0
+        id = len(notebook.cells) + 1
         while i < len(notebook.cells):
             cell = notebook.cells[i]
             # For code cells, add title and summary markdown cells
             if cell.cell_type == "code" and cell.source.strip():
                 # Generate summary
                 summary = self.generate_summary(cell.source)
                 summary_cell = nbformat.v4.new_markdown_cell(summary)
+                summary_cell.outputs = []
+                summary_cell.id = id
+                id += 1
+                # Generate title based on the summary cell
+                title = self.generate_title(summary)
+                title_cell = nbformat.v4.new_markdown_cell(title)
+                title_cell.outputs = []
+                title_cell.id = id
+                id += 1
+                enhanced_notebook.cells.append(title_cell)
                 enhanced_notebook.cells.append(summary_cell)
             # Add the original cell
+            cell.outputs = []
             enhanced_notebook.cells.append(cell)
             i += 1
+        return enhanced_notebook
+    def is_valid(self, words: list[str]):
+        has_noun = False
+        has_verb = False
+        for word in words:
+            if word.pos_ in ["NOUN", "PROPN", "PRON"]:
+                has_noun = True
+            if word.pos_ == "VERB":
+                has_verb = True
+        return has_noun and has_verb
+    def _postprocess_summary(self, summary: str):
+        doc = self.nlp(summary)
+        sentences = list(doc.sents)
+        # ignore the first sentence
+        sentences = sentences[1:]
+        # remove the trailing list enumeration
+        postprocessed_sentences = []
+        for sentence in sentences:
+            if self.is_valid(sentence):
+                postprocessed_sentences.append(sentence.text)
+        return " ".join(postprocessed_sentences)
+def process_notebook(file_path):
     """Process an uploaded notebook file"""
     enhancer = NotebookEnhancer()
+    nb = None
+    with open(file_path, "r", encoding="utf-8") as f:
+        nb = nbformat.read(f, as_version=4)
     # Process the notebook
+    enhanced_notebook = enhancer.enhance_notebook(nb)
+    print(enhanced_notebook)
+    enhanced_notebook_str = nbformat.writes(enhanced_notebook, version=4)
     # Save to temp file
     output_path = "enhanced_notebook.ipynb"
     with open(output_path, "w", encoding="utf-8") as f:
+        f.write(enhanced_notebook_str)
     return output_path
 # This will be the entry point when running the script
 if __name__ == "__main__":
+    file_input = "my_notebook.json"
+    test = process_notebook(file_input)
+    # demo = build_gradio_interface()
+    # demo.launch()

requirements.txt CHANGED Viewed

@@ -1,29 +1,178 @@
-# Core dependencies
-nbformat>=5.1.3
-gradio>=3.32.0
-transformers>=4.26.0
-torch>=1.13.1
-accelerate>=0.16.0
-# NLP model dependencies
-sentencepiece>=0.1.97
-protobuf>=3.20.0
-# Notebook dependencies
-jupyterlab>=3.5.0
-ipykernel>=6.21.0
-# Visualization (for example notebooks)
-matplotlib>=3.6.0
-seaborn>=0.12.0
-pandas>=1.5.0
-numpy>=1.23.0
-# ML components (for example notebooks)
-scikit-learn>=1.2.0
-# Development tools
-pytest>=7.2.0
-black>=23.1.0
-flake8>=6.0.0
-isort>=5.12.0

+accelerate==1.6.0
+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+appnope==0.1.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==3.0.0
+async-lru==2.0.5
+attrs==25.3.0
+babel==2.17.0
+beautifulsoup4==4.13.4
+black==25.1.0
+bleach==6.2.0
+blis==1.3.0
+catalogue==2.0.10
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+cloudpathlib==0.21.0
+comm==0.2.2
+confection==0.1.5
+contourpy==1.3.2
+cycler==0.12.1
+cymem==2.0.11
+debugpy==1.8.14
+decorator==5.2.1
+defusedxml==0.7.1
+executing==2.2.0
+fastapi==0.115.12
+fastjsonschema==2.21.1
+ffmpy==0.5.0
+filelock==3.18.0
+flake8==7.2.0
+fonttools==4.57.0
+fqdn==1.5.1
+fsspec==2025.3.2
+gradio==5.25.2
+gradio_client==1.8.0
+groovy==0.1.2
+h11==0.14.0
+httpcore==1.0.8
+httpx==0.28.1
+huggingface-hub==0.30.2
+idna==3.10
+iniconfig==2.1.0
+ipykernel==6.29.5
+ipython==9.1.0
+ipython_pygments_lexers==1.1.1
+isoduration==20.11.0
+isort==6.0.1
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.4.2
+json5==0.12.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.0
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+kiwisolver==1.4.8
+langcodes==3.5.0
+language_data==1.3.0
+marisa-trie==1.2.1
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.10.1
+matplotlib-inline==0.1.7
+mccabe==0.7.0
+mdurl==0.1.2
+mistune==3.1.3
+mpmath==1.3.0
+murmurhash==1.0.12
+mypy-extensions==1.0.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+notebook_shim==0.2.4
+numpy==2.2.4
+orjson==3.10.16
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.7
+pluggy==1.5.0
+preshed==3.0.9
+prometheus_client==0.21.1
+prompt_toolkit==3.0.51
+protobuf==6.30.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycodestyle==2.13.0
+pycparser==2.22
+pydantic==2.11.3
+pydantic_core==2.33.1
+pydub==0.25.1
+pyflakes==3.3.2
+Pygments==2.19.1
+pyparsing==3.2.3
+pytest==8.3.5
+python-dateutil==2.9.0.post0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.4.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==14.0.0
+rpds-py==0.24.0
+ruff==0.11.5
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+seaborn==0.13.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentencepiece==0.2.0
+shellingham==1.5.4
+six==1.17.0
+smart-open==7.1.0
+sniffio==1.3.1
+soupsieve==2.6
+spacy==3.8.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.1
+stack-data==0.6.3
+starlette==0.46.2
+sympy==1.13.1
+terminado==0.18.1
+thinc==8.3.6
+threadpoolctl==3.6.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+tomlkit==0.13.2
+torch==2.6.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+typer==0.15.2
+types-python-dateutil==2.9.0.20241206
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+uri-template==1.3.0
+urllib3==2.4.0
+uvicorn==0.34.1
+wasabi==1.1.3
+wcwidth==0.2.13
+weasel==0.4.1
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+wrapt==1.17.2

test.ipynb ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tokenize import tokenize\n",
+    "from io import BytesIO\n",
+    "\n",
+    "code = \"\"\"import nltk\n",
+    " from nltk.stem import PorterStemmer\n",
+    " porter_stemmer=PorterStemmer()\n",
+    " words=[\"connect\",\"connected\",\"connection\",\"connections\",\"connects\"]\n",
+    " stemmed_words=[porter_stemmer.stem(word) for word in words]\n",
+    " stemmed_words\"\"\"\n",
+    " \n",
+    "for tok in tokenize(BytesIO(code.encode('utf-8')).readline):\n",
+    "    print(f\"Type: {tok.type}\\nString: {tok.string}\\nStart: {tok.start}\\nEnd: {tok.end}\\nLine: {tok.line.strip()}\\n======\\n\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Create a function to summarize the data.', 'For each column in the dataframe, create a correlation matrix.', '3']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "my_summary = '\\n1. Create a function to summarize the code.\\n2. At first, we will start by importing the pandas and numpy modules.'.strip()\n",
+    "my_summary = 'Create a function summarize and load the dataset.\\n1. To Load the dataset\\n2. To display the basic information\\n3.'.strip()\n",
+    "my_summary = '\\n1. Create a function to summarize the data.\\n2. For each column in the dataframe, create a correlation matrix.\\n3'\n",
+    "my_symmary = \"\\n1. Create a function to summarize the code.\\n2. At first, we will start by importing the pandas and numpy modules.\"\n",
+    "sentences = my_summary.split('\\n')[1:]\n",
+    "#remove the trailing list enumeration\n",
+    "new_sentences = []\n",
+    "for sentence in sentences:\n",
+    "    new_sentences.append(re.sub(\"[0-9]+\\.\\s\", \"\", sentence))\n",
+    "print(new_sentences)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "1. Create a function to summarize the data.\n",
+      "2.\n",
+      "the sentence is valid? True\n",
+      "\n",
+      " False SPACE\n",
+      "1 False X\n",
+      ". False PUNCT\n",
+      "Create True VERB\n",
+      "a True DET\n",
+      "function True NOUN\n",
+      "to True PART\n",
+      "summarize True VERB\n",
+      "the True DET\n",
+      "data True NOUN\n",
+      ". False PUNCT\n",
+      "\n",
+      " False SPACE\n",
+      "2 False X\n",
+      ". False PUNCT\n",
+      "For each column in the dataframe, create a correlation matrix.\n",
+      "\n",
+      "the sentence is valid? True\n",
+      "For True ADP\n",
+      "each True DET\n",
+      "column True NOUN\n",
+      "in True ADP\n",
+      "the True DET\n",
+      "dataframe True NOUN\n",
+      ", False PUNCT\n",
+      "create True VERB\n",
+      "a True DET\n",
+      "correlation True NOUN\n",
+      "matrix True NOUN\n",
+      ". False PUNCT\n",
+      "\n",
+      " False SPACE\n",
+      "3\n",
+      "the sentence is valid? False\n",
+      "3 False NUM\n"
+     ]
+    }
+   ],
+   "source": [
+    "import spacy\n",
+    "nlp = spacy.load(\"en_core_web_sm\")\n",
+    "\n",
+    "\n",
+    "def is_valid(words: list[str]):\n",
+    "    has_noun = False\n",
+    "    has_verb = False\n",
+    "    for word in words: \n",
+    "        if word.pos_ in ['NOUN', 'PROPN', 'PRON']:\n",
+    "            has_noun = True\n",
+    "        if word.pos_ == 'VERB':\n",
+    "            has_verb = True\n",
+    "    return has_noun and has_verb\n",
+    "\n",
+    "doc = nlp(my_summary)\n",
+    "sentences = list(doc.sents)\n",
+    "\n",
+    "for sentence in sentences:\n",
+    "    print(sentence)\n",
+    "    print(\"the sentence is valid?\", is_valid(sentence))\n",
+    "    for word in sentence:\n",
+    "        print(word, word.is_alpha, word.pos_)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

test.json ADDED Viewed

File without changes