Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException,Query | |
from supabase import create_client, Client | |
from typing import List, Dict | |
from statistics import mean | |
from collections import Counter | |
import os | |
from datetime import datetime | |
import json | |
app = FastAPI() | |
# Initialize Supabase client | |
url: str = os.getenv('SUPABASE_URL') | |
key: str = os.getenv('SUPABASE_KEY') | |
supabase: Client = create_client(url, key) | |
async def get_user_travel_data(hushh_id:str): | |
resp = supabase.table("receipt_radar_structured_data_duplicate").select("metadata,message_id,logo,company").eq("brand_category","Travel").eq('user_id',hushh_id).execute() | |
print(resp.data) | |
data = {} | |
arrival_location_frequency = {} | |
for msg in resp.data: | |
print(msg) | |
date = msg.get('Date') | |
if msg.get('metadata') is not None and msg.get('metadata') != 'null': | |
ds = json.loads(msg.get('metadata')) | |
arrival_date = ds.get('arrival_date') or date | |
travel_type = ds.get('travel_type',None) | |
if ds.get('travel_type') == 'null' or travel_type is None: | |
continue | |
departure_destination = ds.get('departure_destination') if ds.get('departure_destination') is not None else None | |
arrival_destination = ds.get('arrival_destination') if ds.get('arrival_destination') is not None else None | |
arrival_city = ds.get('arrival_city') if ds.get('arrival_city') is not None else None | |
if 'travel_history' not in data: | |
data['travel_history'] = [] | |
data['travel_history'].append({"message_id":msg.get('message_id'),"domain":msg.get('company'),"logo":msg.get('logo'),'arrival_date':arrival_date,'travel_type':travel_type,'departure_destination':departure_destination,'arrival_destination':arrival_destination}) | |
print('Data') | |
print(data) | |
# Increment arrival_location_frequency count for the current arrival_destination | |
if arrival_city: | |
if arrival_city in arrival_location_frequency: | |
arrival_location_frequency[arrival_city] += 1 | |
else: | |
arrival_location_frequency[arrival_city] = 1 | |
sorted_arrival_location_frequency = dict( | |
sorted(arrival_location_frequency.items(), key=lambda item: item[1], reverse=True) | |
) | |
data['arrival_location_frequency'] = sorted_arrival_location_frequency | |
return data | |
async def get_travel_analytics(user_id: str = Query(..., description="User's hush ID")): | |
try: | |
# Fetch data from Supabase | |
response = supabase.table("receipt_radar_structured_data_duplicate_duplicate").select("metadata, total_cost, brand_category").eq('user_id',user_id).execute() | |
# Extract metadata from the response | |
metadata_list: List[Dict] = [json.loads(row['metadata']) for row in response.data if row['metadata']] | |
total_costs = [float(row['total_cost']) for row in response.data if row.get('total_cost')] | |
print(metadata_list) | |
print(total_costs) | |
# Initialize variables for analytics | |
total_trips = len(metadata_list) | |
trip_durations = [] | |
domestic_trips = 0 | |
international_trips = 0 | |
destination_types = Counter() | |
booking_lead_times = [] | |
accommodation_spending = [] | |
transport_spending = [] | |
activities_spending = [] | |
domestic_departure_countries = Counter() | |
international_departure_countries = Counter() | |
domestic_arrival_countries = Counter() | |
international_arrival_countries = Counter() | |
all_dates = [] | |
# Process each metadata entry | |
for metadata in metadata_list: | |
# Trip duration | |
if 'check_in_date' in metadata and 'check_out_date' in metadata: | |
print("inside 1st") | |
check_in_date = metadata['check_in_date'] | |
check_out_date = metadata['check_out_date'] | |
duration = (check_out_date - check_in_date).days | |
trip_durations.append(duration) | |
all_dates.append(check_in_date) | |
all_dates.append(check_out_date) | |
# Domestic vs International | |
if 'departure_country' in metadata and 'arrival_country' in metadata: | |
print("inside 2nd") | |
departure_country = metadata['departure_country'] | |
arrival_country = metadata['arrival_country'] | |
if departure_country == arrival_country: | |
print("inside 2nd") | |
domestic_trips += 1 | |
domestic_departure_countries[departure_country] += 1 | |
domestic_arrival_countries[arrival_country] += 1 | |
else: | |
international_trips += 1 | |
international_departure_countries[departure_country] += 1 | |
international_arrival_countries[arrival_country] += 1 | |
# Destination type (simplified) | |
if 'destination_type' in metadata: | |
print("inside 3rd") | |
destination_types[metadata['destination_type']] += 1 | |
# Booking lead time | |
if 'date_of_purchase' in metadata and 'departure_date' in metadata: | |
print("inside 4th") | |
lead_time = (datetime.strptime(metadata['departure_date'],"%d-%m-%Y") - datetime.strptime(metadata['date_of_purchase'],"%d-%m-%Y")).days | |
booking_lead_times.append(lead_time) | |
# Spending | |
if 'accommodation_cost' in metadata: | |
accommodation_spending.append(metadata['accommodation_cost']) | |
if 'transport_cost' in metadata: | |
transport_spending.append(metadata['transport_cost']) | |
if 'activities_cost' in metadata: | |
activities_spending.append(metadata['activities_cost']) | |
# Calculate the number of years covered by the data | |
if all_dates: | |
min_date = min(all_dates) | |
max_date = max(all_dates) | |
date_range_years = (max_date - min_date).days / 365.25 | |
else: | |
date_range_years = 1 # Default to 1 year if no dates are available | |
# Calculate analytics | |
analytics = { | |
"travel_frequency": { | |
"trips_per_year": total_trips / date_range_years, | |
"average_trip_duration": mean(trip_durations) if trip_durations else None, | |
"domestic_vs_international": f"{domestic_trips}:{international_trips}", | |
"domestic_departure_countries": dict(domestic_departure_countries), | |
"international_departure_countries": dict(international_departure_countries), | |
"domestic_arrival_countries": dict(domestic_arrival_countries), | |
"international_arrival_countries": dict(international_arrival_countries), | |
}, | |
# "destination_preferences": { | |
# "popular_types": dict(destination_types.most_common(5)) | |
# }, | |
"booking_patterns": { | |
"average_lead_time": mean(booking_lead_times) if booking_lead_times else None | |
}, | |
"travel_expenditure": { | |
"average_accommodation_cost": mean(accommodation_spending) if accommodation_spending else None, | |
"average_transport_cost": mean(transport_spending) if transport_spending else None, | |
"average_activities_cost": mean(activities_spending) if activities_spending else None | |
} | |
} | |
return analytics | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |