Update pages/6.Data Collection.py
Browse files- pages/6.Data Collection.py +53 -0
pages/6.Data Collection.py
CHANGED
@@ -150,3 +150,56 @@ if format_selected == "CSV":
|
|
150 |
file_name="CSV_guide.ipynb",
|
151 |
mime="application/octet-stream",
|
152 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
file_name="CSV_guide.ipynb",
|
151 |
mime="application/octet-stream",
|
152 |
)
|
153 |
+
|
154 |
+
# Main Section
|
155 |
+
st.title("XML Data Format Guide")
|
156 |
+
|
157 |
+
# XML Explanation Sections
|
158 |
+
st.write("#### a. What is XML?")
|
159 |
+
st.write("""
|
160 |
+
XML (eXtensible Markup Language) is a markup language designed to store and transport data.
|
161 |
+
It uses a hierarchical structure and tags, making it both human-readable and machine-readable.
|
162 |
+
""")
|
163 |
+
|
164 |
+
st.write("#### b. How to Read XML Files")
|
165 |
+
st.code("""
|
166 |
+
import xml.etree.ElementTree as ET
|
167 |
+
|
168 |
+
# Parse an XML file
|
169 |
+
tree = ET.parse("file.xml")
|
170 |
+
root = tree.getroot()
|
171 |
+
|
172 |
+
# Access elements
|
173 |
+
for child in root:
|
174 |
+
print(child.tag, child.text)
|
175 |
+
""", language="python")
|
176 |
+
|
177 |
+
st.write("#### c. Issues Encountered When Handling XML Files")
|
178 |
+
st.write("""
|
179 |
+
1. **Complex Structures:** XML files may have deeply nested hierarchies.
|
180 |
+
2. **Large File Sizes:** Memory-intensive parsing for large files.
|
181 |
+
3. **Data Inconsistency:** Missing or unexpected tags may cause parsing errors.
|
182 |
+
4. **Encoding Issues:** Files with non-standard encodings can fail to parse.
|
183 |
+
""")
|
184 |
+
|
185 |
+
st.write("#### d. How to Overcome These Issues")
|
186 |
+
st.code("""
|
187 |
+
from lxml import etree
|
188 |
+
|
189 |
+
# Handle large XML files using event-driven parsing
|
190 |
+
for event, element in etree.iterparse("large_file.xml", events=("end",)):
|
191 |
+
print(element.tag, element.text)
|
192 |
+
element.clear()
|
193 |
+
""", language="python")
|
194 |
+
|
195 |
+
# Downloadable Guide Button
|
196 |
+
st.markdown("### Download Coding Guide")
|
197 |
+
if st.button("Download XML Guide"):
|
198 |
+
file_path = "XML_guide.ipynb" # Replace with the actual file path
|
199 |
+
with open(file_path, "rb") as file:
|
200 |
+
st.download_button(
|
201 |
+
label="Download XML Guide",
|
202 |
+
data=file,
|
203 |
+
file_name="XML_guide.ipynb",
|
204 |
+
mime="application/octet-stream",
|
205 |
+
)
|