ankithpatel commited on
Commit
35ea08a
·
verified ·
1 Parent(s): f8e79e0

Update pages/Lifecycle of Machine Learning.py

Browse files
pages/Lifecycle of Machine Learning.py CHANGED
@@ -332,5 +332,132 @@ y, sr = librosa.load(audio_path, sr=None) # sr=None to preserve the original sa
332
  """
333
  )
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  """
333
  )
334
 
335
+ elif data_type == "Semi-Structured":
336
+
337
+ st.subheader("Semi-structured Data")
338
+
339
+ st.write(
340
+ "Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze."
341
+
342
+ st.write("Data Formats:")
343
+ format_selected = st.radio(
344
+ "Select a format to explore further:",
345
+ ("JSON","XML")
346
+ )
347
+
348
+ #HOW TO READ TEXT
349
+ if format_selected == "JSON":
350
+
351
+ st.subheader("JSON Data Format")
352
+ st.write("*What is it?*")
353
+ st.write(
354
+ "JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. "
355
+ )
356
+
357
+
358
+ st.write("*How to Read JSON Files?*")
359
+ st.code(
360
+
361
+ """
362
+
363
+ import json
364
+
365
+ # Open and read the JSON file
366
+ with open('data.json', 'r') as file:
367
+ data = json.load(file)
368
+ """,
369
+ language="python"
370
+
371
+
372
+ )
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+ st.write("*Common Issues When Handling json Files*")
383
+
384
+ st.write(
385
+ """
386
+ - File Encoding Issues
387
+ - Invalid JSON Syntax
388
+ - Large JSON Files Causing Memory Issues
389
+ - Inconsistent Data Structure
390
+ """
391
+ )
392
+
393
+
394
+
395
+ st.write("*How to Overcome These Errors/Issues?*")
396
+ st.write(
397
+ """
398
+ - Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format.
399
+
400
+ - Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')).
401
 
402
+ - Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines.
403
+
404
+ - Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully.
405
+ """
406
+ )
407
+
408
+ elif format_selected == "XML":
409
+
410
+ st.subheader("XML Data Format")
411
+ st.write("*What is it?*")
412
+ st.write(
413
+ "XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. "
414
+ )
415
+
416
+
417
+ st.write("*How to Read XML Files?*")
418
+ st.code(
419
+
420
+ """
421
+
422
+ import pandas as pd
423
+ pd.read_xml(r"C:\Users\ankit\OneDrive\Documents\xml data 3.txt")
424
+ """,
425
+ language="python"
426
+
427
+
428
+ )
429
+
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+
438
+ st.write("*Common Issues When Handling XML Files*")
439
+
440
+ st.write(
441
+ """
442
+ - Invalid XML Syntax.
443
+ - Encoding Issues.
444
+ - Large XML Files.
445
+ - Inconsistent Structure.
446
+ """
447
+ )
448
+
449
+
450
+
451
+ st.write("*How to Overcome These Errors/Issues?*")
452
+ st.write(
453
+ """
454
+ - Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing.
455
+ - Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically.
456
+ - Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory.
457
+ - Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic.
458
+
459
+
460
+
461
+ """
462
+ )
463
+