File size: 2,110 Bytes
67f6dad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import pandas as pd
import os
import random
from faker import Faker
def generate_advanced_data(n=1000):
"""
Generate synthetic patient data for hospital readmission prediction.
Includes clinical and social features commonly used in care management models.
"""
fake = Faker()
diagnoses = ['Diabetes', 'CHF', 'COPD', 'CKD', 'Depression']
data = []
for i in range(n):
chronic = random.randint(0, 5)
er_visits = random.randint(0, 4)
adherence = random.choice(['Low', 'Medium', 'High'])
prev_readmits = random.randint(0, 3)
age = random.randint(45, 90)
risk_score = (
chronic * 1.2 +
er_visits +
(0 if adherence == 'High' else 1) +
prev_readmits +
(1 if random.random() > 0.5 else 0)
)
readmitted = 1 if risk_score > 5 else 0
record = {
"Patient_ID": f"P{i+1:04}",
"Age": age,
"Gender": random.choice(["M", "F"]),
"Chronic_Conditions": chronic,
"Primary_Diagnosis": random.choice(diagnoses),
"Num_ER_Visits": er_visits,
"Last_Discharge_Days_Ago": random.randint(1, 60),
"Previous_Readmissions": prev_readmits,
"FollowUp_Scheduled": random.choice(["Yes", "No"]),
"Medication_Adherence": adherence,
"Language_Barrier": random.choice(["Yes", "No"]),
"Housing_Instability": random.choice(["Yes", "No"]),
"Caregiver_Support": random.choice(["Yes", "No"]),
"Readmitted_30_Days": readmitted
}
data.append(record)
return pd.DataFrame(data)
def load_or_generate_data(path="data/patients.csv", n=1000):
"""
Load existing patient data if available, otherwise generate synthetic data.
"""
if os.path.exists(path):
return pd.read_csv(path)
df = generate_advanced_data(n=n)
os.makedirs(os.path.dirname(path), exist_ok=True)
df.to_csv(path, index=False)
return df
|