Spaces:
Sleeping
Sleeping
Update appStore/prep_data.py
Browse files- appStore/prep_data.py +22 -22
appStore/prep_data.py
CHANGED
@@ -4,6 +4,27 @@ from appStore.prep_utils import create_chunks
|
|
4 |
from appStore.search import hybrid_search
|
5 |
from datetime import datetime
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def process_iati():
|
9 |
"""
|
@@ -58,28 +79,7 @@ def process_giz_worldwide():
|
|
58 |
'duration.project.end': 'end_year'
|
59 |
})
|
60 |
|
61 |
-
|
62 |
-
try:
|
63 |
-
# If val is a string, first check if it represents a numeric value.
|
64 |
-
if isinstance(val, str):
|
65 |
-
val_str = val.strip()
|
66 |
-
try:
|
67 |
-
# Try converting the string to a float (i.e. it’s an epoch in string form)
|
68 |
-
num = float(val_str)
|
69 |
-
return datetime.utcfromtimestamp(num / 1000).strftime("%Y-%m-%d")
|
70 |
-
except ValueError:
|
71 |
-
# Not a numeric string; assume it's already a date string in "YYYY-MM-DD" format.
|
72 |
-
# Optionally, you can validate it:
|
73 |
-
datetime.strptime(val_str, "%Y-%m-%d")
|
74 |
-
return val_str
|
75 |
-
elif isinstance(val, (int, float)):
|
76 |
-
return datetime.utcfromtimestamp(val / 1000).strftime("%Y-%m-%d")
|
77 |
-
else:
|
78 |
-
return "Unknown"
|
79 |
-
except Exception:
|
80 |
-
return "Unknown"
|
81 |
-
|
82 |
-
|
83 |
giz_df['end_year'] = giz_df['end_year'].apply(convert_to_date)
|
84 |
|
85 |
# Create an empty 'url' column as the new dataset has an empty URL
|
|
|
4 |
from appStore.search import hybrid_search
|
5 |
from datetime import datetime
|
6 |
|
7 |
+
def convert_to_date(val):
|
8 |
+
try:
|
9 |
+
# If val is a string, first check if it represents a numeric value.
|
10 |
+
if isinstance(val, str):
|
11 |
+
val_str = val.strip()
|
12 |
+
try:
|
13 |
+
# Try converting the string to a float (i.e. it’s an epoch in string form)
|
14 |
+
num = float(val_str)
|
15 |
+
return datetime.utcfromtimestamp(num / 1000).strftime("%Y-%m-%d")
|
16 |
+
except ValueError:
|
17 |
+
# Not a numeric string; assume it's already a date string in "YYYY-MM-DD" format.
|
18 |
+
# Optionally, you can validate it:
|
19 |
+
datetime.strptime(val_str, "%Y-%m-%d")
|
20 |
+
return val_str
|
21 |
+
elif isinstance(val, (int, float)):
|
22 |
+
return datetime.utcfromtimestamp(val / 1000).strftime("%Y-%m-%d")
|
23 |
+
else:
|
24 |
+
return "Unknown"
|
25 |
+
except Exception:
|
26 |
+
return "Unknown"
|
27 |
+
|
28 |
|
29 |
def process_iati():
|
30 |
"""
|
|
|
79 |
'duration.project.end': 'end_year'
|
80 |
})
|
81 |
|
82 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
giz_df['end_year'] = giz_df['end_year'].apply(convert_to_date)
|
84 |
|
85 |
# Create an empty 'url' column as the new dataset has an empty URL
|