Spaces:
Sleeping
Sleeping
Update pages/Lifecycle of Machine Learning.py
Browse files
pages/Lifecycle of Machine Learning.py
CHANGED
@@ -332,5 +332,132 @@ y, sr = librosa.load(audio_path, sr=None) # sr=None to preserve the original sa
|
|
332 |
"""
|
333 |
)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
|
336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
"""
|
333 |
)
|
334 |
|
335 |
+
elif data_type == "Semi-Structured":
|
336 |
+
|
337 |
+
st.subheader("Semi-structured Data")
|
338 |
+
|
339 |
+
st.write(
|
340 |
+
"Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze."
|
341 |
+
|
342 |
+
st.write("Data Formats:")
|
343 |
+
format_selected = st.radio(
|
344 |
+
"Select a format to explore further:",
|
345 |
+
("JSON","XML")
|
346 |
+
)
|
347 |
+
|
348 |
+
#HOW TO READ TEXT
|
349 |
+
if format_selected == "JSON":
|
350 |
+
|
351 |
+
st.subheader("JSON Data Format")
|
352 |
+
st.write("*What is it?*")
|
353 |
+
st.write(
|
354 |
+
"JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. "
|
355 |
+
)
|
356 |
+
|
357 |
+
|
358 |
+
st.write("*How to Read JSON Files?*")
|
359 |
+
st.code(
|
360 |
+
|
361 |
+
"""
|
362 |
+
|
363 |
+
import json
|
364 |
+
|
365 |
+
# Open and read the JSON file
|
366 |
+
with open('data.json', 'r') as file:
|
367 |
+
data = json.load(file)
|
368 |
+
""",
|
369 |
+
language="python"
|
370 |
+
|
371 |
+
|
372 |
+
)
|
373 |
+
|
374 |
+
|
375 |
+
|
376 |
+
|
377 |
+
|
378 |
+
|
379 |
+
|
380 |
+
|
381 |
+
|
382 |
+
st.write("*Common Issues When Handling json Files*")
|
383 |
+
|
384 |
+
st.write(
|
385 |
+
"""
|
386 |
+
- File Encoding Issues
|
387 |
+
- Invalid JSON Syntax
|
388 |
+
- Large JSON Files Causing Memory Issues
|
389 |
+
- Inconsistent Data Structure
|
390 |
+
"""
|
391 |
+
)
|
392 |
+
|
393 |
+
|
394 |
+
|
395 |
+
st.write("*How to Overcome These Errors/Issues?*")
|
396 |
+
st.write(
|
397 |
+
"""
|
398 |
+
- Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format.
|
399 |
+
|
400 |
+
- Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')).
|
401 |
|
402 |
+
- Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines.
|
403 |
+
|
404 |
+
- Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully.
|
405 |
+
"""
|
406 |
+
)
|
407 |
+
|
408 |
+
elif format_selected == "XML":
|
409 |
+
|
410 |
+
st.subheader("XML Data Format")
|
411 |
+
st.write("*What is it?*")
|
412 |
+
st.write(
|
413 |
+
"XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. "
|
414 |
+
)
|
415 |
+
|
416 |
+
|
417 |
+
st.write("*How to Read XML Files?*")
|
418 |
+
st.code(
|
419 |
+
|
420 |
+
"""
|
421 |
+
|
422 |
+
import pandas as pd
|
423 |
+
pd.read_xml(r"C:\Users\ankit\OneDrive\Documents\xml data 3.txt")
|
424 |
+
""",
|
425 |
+
language="python"
|
426 |
+
|
427 |
+
|
428 |
+
)
|
429 |
+
|
430 |
+
|
431 |
+
|
432 |
+
|
433 |
+
|
434 |
+
|
435 |
+
|
436 |
+
|
437 |
+
|
438 |
+
st.write("*Common Issues When Handling XML Files*")
|
439 |
+
|
440 |
+
st.write(
|
441 |
+
"""
|
442 |
+
- Invalid XML Syntax.
|
443 |
+
- Encoding Issues.
|
444 |
+
- Large XML Files.
|
445 |
+
- Inconsistent Structure.
|
446 |
+
"""
|
447 |
+
)
|
448 |
+
|
449 |
+
|
450 |
+
|
451 |
+
st.write("*How to Overcome These Errors/Issues?*")
|
452 |
+
st.write(
|
453 |
+
"""
|
454 |
+
- Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing.
|
455 |
+
- Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically.
|
456 |
+
- Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory.
|
457 |
+
- Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic.
|
458 |
+
|
459 |
+
|
460 |
+
|
461 |
+
"""
|
462 |
+
)
|
463 |
+
|