Spaces:

ankithpatel
/

zero_to_hero_ML

Sleeping

App Files Files Community

ankithpatel commited on Dec 14, 2024

Commit

35ea08a

verified ·

1 Parent(s): f8e79e0

Update pages/Lifecycle of Machine Learning.py

Browse files

Files changed (1) hide show

pages/Lifecycle of Machine Learning.py +128 -1

pages/Lifecycle of Machine Learning.py CHANGED Viewed

@@ -332,5 +332,132 @@ y, sr = librosa.load(audio_path, sr=None)  # sr=None to preserve the original sa
                 """
             )

                 """
             )
+    elif data_type == "Semi-Structured":
+        st.subheader("Semi-structured Data")
+        st.write(
+            "Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze."
+        st.write("Data Formats:")
+        format_selected = st.radio(
+            "Select a format to explore further:",
+            ("JSON","XML")
+        )
+        #HOW TO READ TEXT
+        if format_selected == "JSON":
+            st.subheader("JSON Data Format")
+            st.write("*What is it?*")
+            st.write(
+                "JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. "
+                )
+            st.write("*How to Read JSON Files?*")
+            st.code(
+                """
+import json
+# Open and read the JSON file
+with open('data.json', 'r') as file:
+    data = json.load(file)
+                """,
+                 language="python"
+            )
+            st.write("*Common Issues When Handling json Files*")
+            st.write(
+                """
+- File Encoding Issues
+-  Invalid JSON Syntax
+- Large JSON Files Causing Memory Issues
+- Inconsistent Data Structure
+                """
+            )
+            st.write("*How to Overcome These Errors/Issues?*")
+            st.write(
+                """
+- Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format.
+- Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')).
+- Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines.
+- Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully.
+                """
+            )
+        elif format_selected == "XML":
+            st.subheader("XML Data Format")
+            st.write("*What is it?*")
+            st.write(
+                "XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. "
+                )
+            st.write("*How to Read XML Files?*")
+            st.code(
+                """
+import pandas as pd
+pd.read_xml(r"C:\Users\ankit\OneDrive\Documents\xml data 3.txt")
+                """,
+                 language="python"
+            )
+            st.write("*Common Issues When Handling XML Files*")
+            st.write(
+                """
+-  Invalid XML Syntax.
+- Encoding Issues.
+-  Large XML Files.
+- Inconsistent Structure.
+                """
+            )
+            st.write("*How to Overcome These Errors/Issues?*")
+            st.write(
+                """
+-  Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing.
+- Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically.
+- Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory.
+- Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic.
+                """
+            )