# Install TA-Lib (see instructions above) then: pip install TA-Lib
import ccxt
import numpy as np
import pandas as pd
import time
from sklearn.neighbors import KNeighborsClassifier
from scipy.linalg import svd
import gradio as gr
import concurrent.futures
import traceback
from datetime import datetime, timezone, timedelta
import logging
import sys
import talib # Import TA-Lib
import threading

# --- Setup Logging ---
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - [%(threadName)s:%(funcName)s] - %(message)s',
    stream=sys.stdout
)
logging.getLogger().handlers[0].flush = sys.stdout.flush

# --- Parameters ---
L = 10
LAG = 11
MINUTES_PER_HOUR = 60
PREDICTION_WINDOW_HOURS = 2
TRAINING_WINDOW_HOURS = 12
TOTAL_WINDOW_HOURS = TRAINING_WINDOW_HOURS + PREDICTION_WINDOW_HOURS
K = TRAINING_WINDOW_HOURS * MINUTES_PER_HOUR # 720
WINDOW = TOTAL_WINDOW_HOURS * MINUTES_PER_HOUR # 840
FEATURES = ['open', 'high', 'low', 'close', 'volume']
D = 5
OVERLAP_STEP = 60
MIN_TRAINING_EXAMPLES = 20
MAX_COINS_TO_DISPLAY = 10
USE_SYNTHETIC_DATA_FOR_LOW_VOLUME = False
NUM_WORKERS_TRAINING = 4
NUM_WORKERS_PREDICTION = 10

# --- TA & Risk Parameters ---
TA_DATA_POINTS = 200 # Candles needed for TA calculation
RSI_PERIOD = 14
MACD_FAST = 12
MACD_SLOW = 26
MACD_SIGNAL = 9
ATR_PERIOD = 14
CONFIDENCE_THRESHOLD = 0.65 # Min confidence for Rise signal
TP1_ATR_MULTIPLIER = 1.5
TP2_ATR_MULTIPLIER = 3.0
SL_ATR_MULTIPLIER = 1.0

# --- CCXT Initialization ---
try:
    exchange = ccxt.bitget({
        'enableRateLimit': True,
        'rateLimit': 1100,
        'timeout': 45000,
        'options': {'adjustForTimeDifference': True}
    })
    logging.info(f"Initialized {exchange.id} exchange.")
except Exception as e:
    logging.exception("FATAL: Could not initialize CCXT exchange.")
    sys.exit()

# --- Global Caches and Variables ---
markets_cache = None
last_markets_update = None
data_cache = {}
trained_models = {}
last_update_time = datetime.now(timezone.utc)

# --- Functions ---

def format_datetime(dt, default="N/A"):
    # (Keep this function as is)
    if pd.isna(dt) or dt is None:
        return default
    try:
        if isinstance(dt, (datetime, pd.Timestamp)):
            if dt.tzinfo is None:
                dt = dt.replace(tzinfo=timezone.utc)
            return dt.strftime('%Y-%m-%d %H:%M:%S %Z')
        else:
            return str(dt)
    except Exception:
        return default

def get_all_usdt_pairs():
    # (Keep this function as is - no changes needed)
    global markets_cache, last_markets_update
    current_time = time.time()
    cache_duration = 3600 # 1 hour

    if markets_cache is not None and last_markets_update is not None:
        if current_time - last_markets_update < cache_duration:
            logging.info("Using cached markets list.")
            if isinstance(markets_cache, list) and markets_cache:
                 return markets_cache
            else:
                 logging.warning("Cached market list was invalid, fetching fresh.")


    logging.info("Fetching markets from Bitget...")
    try:
        exchange.load_markets(reload=True)
        all_symbols = list(exchange.markets.keys())
        usdt_pairs = [
            symbol for symbol in all_symbols
            if isinstance(symbol, str)
            and symbol.endswith('/USDT')
            and exchange.markets.get(symbol, {}).get('active', False)
            and exchange.markets.get(symbol, {}).get('spot', False)
            and 'LEVERAGED' not in exchange.markets.get(symbol, {}).get('type', 'spot').upper()
            and not exchange.markets.get(symbol, {}).get('inverse', False)
        ]
        logging.info(f"Found {len(usdt_pairs)} active USDT spot pairs initially.")
        if not usdt_pairs:
             logging.warning("No active USDT spot pairs found.")
             return ['BTC/USDT', 'ETH/USDT', 'SOL/USDT']

        logging.info(f"Fetching tickers for {len(usdt_pairs)} pairs for volume sorting...")
        volumes = {}
        symbols_to_fetch = usdt_pairs[:]
        fetched_tickers = {}
        try:
            if exchange.has['fetchTickers']:
                 batch_size_tickers = 100
                 for i in range(0, len(symbols_to_fetch), batch_size_tickers):
                      batch_symbols = symbols_to_fetch[i:i+batch_size_tickers]
                      logging.info(f"Fetching ticker batch {i//batch_size_tickers + 1}/{ (len(symbols_to_fetch) + batch_size_tickers -1)//batch_size_tickers }...")
                      retries = 2
                      for attempt in range(retries):
                           try:
                                batch_tickers = exchange.fetch_tickers(symbols=batch_symbols)
                                fetched_tickers.update(batch_tickers)
                                time.sleep(exchange.rateLimit / 1000 * 1.5) # Add delay
                                break
                           except (ccxt.RequestTimeout, ccxt.NetworkError) as e_timeout:
                                logging.warning(f"Ticker fetch timeout/network error on attempt {attempt+1}/{retries}: {e_timeout}, retrying after delay...")
                                time.sleep(3 * (attempt + 1))
                           except ccxt.RateLimitExceeded:
                                logging.warning(f"Rate limit exceeded fetching tickers, sleeping...")
                                time.sleep(10 * (attempt+1)) # Longer sleep for rate limit
                           except Exception as e_ticker:
                                logging.error(f"Error fetching ticker batch (attempt {attempt+1}): {e_ticker}")
                                if attempt == retries - 1: raise # Rethrow last error
                                time.sleep(2 * (attempt + 1))

                 logging.info(f"Fetched {len(fetched_tickers)} tickers using fetchTickers.")
            else:
                 raise ccxt.NotSupported("fetchTickers not supported/enabled. Volume sorting requires it.")

        except Exception as e:
             logging.exception(f"Could not fetch tickers for volume sorting: {e}. Volume sorting unavailable.")
             markets_cache = usdt_pairs[:MAX_COINS_TO_DISPLAY]
             last_markets_update = current_time
             logging.warning(f"Returning top {len(markets_cache)} unsorted pairs due to ticker error.")
             return markets_cache

        for symbol, ticker in fetched_tickers.items():
            try:
                quote_volume = ticker.get('info', {}).get('quoteVolume') # Prefer quoteVolume if available
                last_price = ticker.get('last')
                base_volume = ticker.get('baseVolume')

                # Ensure values are convertible to float before calculation
                valid_last = last_price is not None
                valid_base = base_volume is not None
                valid_quote = quote_volume is not None

                if valid_quote:
                    volumes[symbol] = float(quote_volume)
                elif valid_base and valid_last:
                    volumes[symbol] = float(base_volume) * float(last_price)
                else:
                     volumes[symbol] = 0
            except (TypeError, ValueError, KeyError, AttributeError) as e:
                 logging.warning(f"Could not parse volume/price for {symbol} from ticker: {ticker}. Error: {e}")
                 volumes[symbol] = 0

        valid_volume_pairs = {k: v for k, v in volumes.items() if v > 0}
        logging.info(f"Found {len(valid_volume_pairs)} pairs with non-zero volume.")

        if not valid_volume_pairs:
             logging.warning("No pairs with valid volume found. Returning default list.")
             return ['BTC/USDT', 'ETH/USDT', 'SOL/USDT']

        sorted_pairs = sorted(valid_volume_pairs.items(), key=lambda item: item[1], reverse=True)
        num_pairs_to_take = min(MAX_COINS_TO_DISPLAY, len(sorted_pairs))
        top_pairs = [pair[0] for pair in sorted_pairs[:num_pairs_to_take]]
        logging.info(f"Selected Top {len(top_pairs)} pairs by volume. Top 5: {[p[0] for p in sorted_pairs[:5]]}")

        markets_cache = top_pairs
        last_markets_update = current_time
        return top_pairs

    except ccxt.NetworkError as e:
        logging.error(f"Network error getting USDT pairs: {e}")
    except ccxt.ExchangeError as e:
         logging.error(f"Exchange error getting USDT pairs: {e}")
    except Exception as e:
        logging.exception("General error getting USDT pairs.")

    logging.warning("Error fetching markets, returning default fallback list.")
    return ['BTC/USDT', 'ETH/USDT', 'SOL/USDT', 'BNB/USDT', 'XRP/USDT']

def clean_and_process_ohlcv(ohlcv_list, symbol, expected_candles):
    # (Keep this function as is - no changes needed)
    if not ohlcv_list:
        return None
    try:
        df = pd.DataFrame(ohlcv_list, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        initial_len = len(df)
        if initial_len == 0: return None

        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
        df = df.drop_duplicates(subset=['timestamp'])
        df = df.sort_values('timestamp')
        len_after_dupes = len(df)

        numeric_cols = ['open', 'high', 'low', 'close', 'volume']
        for col in numeric_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Drop rows with NaN in essential price/volume features needed for TA-Lib
        df = df.dropna(subset=numeric_cols)
        len_after_na = len(df)

        df.reset_index(drop=True, inplace=True)

        logging.debug(f"Data cleaning for {symbol}: Initial Fetched={initial_len}, AfterDupes={len_after_dupes}, AfterNA={len_after_na}")

        if len(df) >= expected_candles:
            final_df = df.iloc[-expected_candles:].copy() # Take the most recent ones
            return final_df
        else:
            return None

    except Exception as e:
        logging.exception(f"Error processing DataFrame for {symbol}")
        return None

def fetch_historical_data(symbol, timeframe='1m', total_candles=WINDOW):
    # (Keep this function as is - no changes needed)
    cache_key = f"{symbol}_{timeframe}_{total_candles}"
    current_time = time.time()
    cache_validity_seconds = 300 # 5 minutes

    if cache_key in data_cache:
        cache_time, cached_data = data_cache[cache_key]
        if current_time - cache_time < cache_validity_seconds:
            if isinstance(cached_data, pd.DataFrame) and len(cached_data) == total_candles:
                 logging.debug(f"Using valid cached data for {symbol} ({len(cached_data)} candles)")
                 return cached_data.copy()
            else:
                 logging.warning(f"Cache for {symbol} invalid or wrong size ({len(cached_data) if isinstance(cached_data, pd.DataFrame) else 'N/A'} vs {total_candles}), fetching fresh.")
                 if cache_key in data_cache: del data_cache[cache_key]

    if not exchange.has['fetchOHLCV']:
        logging.error(f"Exchange {exchange.id} does not support fetchOHLCV.")
        return None

    logging.debug(f"Fetching {total_candles} candles for {symbol} (timeframe: {timeframe})")
    final_df = None
    fetch_start_time = time.time()
    duration_ms = exchange.parse_timeframe(timeframe) * 1000
    now_ms = exchange.milliseconds()

    # --- Strategy 1: Try Single Large Fetch ---
    single_fetch_limit = total_candles + 200 # Buffer
    single_fetch_since = now_ms - single_fetch_limit * duration_ms
    try:
        ohlcv_list = exchange.fetch_ohlcv(symbol, timeframe, limit=single_fetch_limit, since=single_fetch_since)
        if ohlcv_list:
            processed_df = clean_and_process_ohlcv(ohlcv_list, symbol, total_candles)
            if processed_df is not None and len(processed_df) == total_candles:
                 final_df = processed_df
    except ccxt.RateLimitExceeded as e:
        logging.warning(f"Rate limit hit during single fetch for {symbol}, falling back: {e}")
        time.sleep(5)
    except (ccxt.RequestTimeout, ccxt.NetworkError) as e:
         logging.warning(f"Timeout/Network error during single fetch for {symbol}, falling back: {e}")
         time.sleep(2)
    except ccxt.ExchangeNotAvailable as e:
         logging.error(f"Exchange not available during fetch for {symbol}: {e}")
         return None
    except ccxt.AuthenticationError as e:
         logging.error(f"Authentication error fetching {symbol}: {e}")
         return None
    except ccxt.ExchangeError as e:
         logging.warning(f"Exchange error during single fetch for {symbol}, falling back: {e}")
    except Exception as e:
        logging.exception(f"Unexpected error during single fetch for {symbol}, falling back.")

    # --- Strategy 2: Fallback to Iterative Chunking ---
    if final_df is None:
        logging.debug(f"Falling back to iterative chunk fetching for {symbol}.")
        limit_per_call = exchange.safe_integer(exchange.limits.get('fetchOHLCV', {}), 'max', 1000)
        limit_per_call = min(limit_per_call, 1000)
        all_ohlcv_chunks = []
        required_start_time_ms = now_ms - (total_candles + 5) * duration_ms
        current_chunk_end_time_ms = now_ms
        max_chunk_attempts = 15
        attempts = 0

        while attempts < max_chunk_attempts:
            attempts += 1
            oldest_ts_in_hand = all_ohlcv_chunks[0][0] if all_ohlcv_chunks else current_chunk_end_time_ms
            if oldest_ts_in_hand <= required_start_time_ms:
                logging.debug(f"Chunking: Collected enough historical range for {symbol}.")
                break

            fetch_limit = limit_per_call
            chunk_fetch_since = oldest_ts_in_hand - fetch_limit * duration_ms
            params = {}
            try:
                ohlcv_chunk = exchange.fetch_ohlcv(symbol, timeframe, since=chunk_fetch_since, limit=fetch_limit, params=params)
                if not ohlcv_chunk:
                    logging.debug(f"Chunking: No more data received for {symbol} from API.")
                    break

                new_chunk = [c for c in ohlcv_chunk if c[0] < oldest_ts_in_hand]
                if not new_chunk:
                    break

                new_chunk.sort(key=lambda x: x[0])
                all_ohlcv_chunks = new_chunk + all_ohlcv_chunks

                if len(new_chunk) < limit_per_call // 20 and attempts > 5:
                    logging.warning(f"Chunking: Received very few new candles ({len(new_chunk)}) repeatedly for {symbol}.")
                    break
                time.sleep(exchange.rateLimit / 1000 * 1.1)

            except ccxt.RateLimitExceeded as e:
                logging.warning(f"Rate limit hit during chunking for {symbol}, sleeping 10s: {e}")
                time.sleep(10 * (attempts/3 + 1))
            except (ccxt.NetworkError, ccxt.RequestTimeout) as e:
                 logging.error(f"Network/Timeout error during chunking for {symbol}: {e}. Stopping.")
                 break
            except ccxt.ExchangeError as e:
                 logging.error(f"Exchange error during chunking for {symbol}: {e}. Stopping.")
                 break
            except Exception as e:
                logging.exception(f"Generic error during chunking fetch for {symbol}")
                break

        if attempts >= max_chunk_attempts:
             logging.warning(f"Max chunk fetch attempts reached for {symbol}.")

        if all_ohlcv_chunks:
             processed_df = clean_and_process_ohlcv(all_ohlcv_chunks, symbol, total_candles)
             if processed_df is not None and len(processed_df) == total_candles:
                final_df = processed_df
        else:
             logging.error(f"No data obtained from chunk fetching for {symbol}.")

    # --- Final Check and Caching ---
    if final_df is not None and len(final_df) == total_candles:
        expected_cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
        if all(col in final_df.columns for col in expected_cols):
            data_cache[cache_key] = (current_time, final_df.copy())
            return final_df
        else:
            logging.error(f"Final DataFrame for {symbol} missing expected columns. Won't cache.")
            return None
    else:
        logging.error(f"Failed to fetch exactly {total_candles} candles for {symbol}. Found: {len(final_df) if final_df is not None else 0}")
        return None

# --- Embedding, LLT, Normalize, Training Prep (Largely unchanged) ---
# Keep create_embedding, llt_transform, normalize_data, prepare_training_data, train_model
# as they don't depend on the TA library choice.

def create_embedding(data, l=L, lag=LAG):
    # (Keep this function as is)
    n = len(data)
    rows = n - (l - 1) * lag
    if rows <= 0:
        logging.debug(f"Cannot create embedding: data length {n} too short for L={l}, Lag={lag}")
        return np.array([])
    A = np.zeros((rows, l))
    try:
        for t in range(rows):
            indices = t + np.arange(l) * lag
            A[t] = data[indices]
        return A
    except IndexError as e:
        logging.error(f"IndexError during embedding: n={n}, l={l}, lag={lag}. Error: {e}")
        return np.array([])
    except Exception as e:
        logging.exception("Error in create_embedding")
        return np.array([])

def llt_transform(X_train, y_train, X_test):
    # (Keep this function as is)
    if not isinstance(X_train, np.ndarray) or X_train.ndim != 3 or \
       not isinstance(y_train, np.ndarray) or y_train.ndim != 1 or \
       not isinstance(X_test, np.ndarray) or (X_test.size > 0 and X_test.ndim != 3):
        logging.error(f"LLT input type/shape error.")
        return np.array([]), np.array([])
    if X_train.shape[0] != y_train.shape[0]:
        logging.error(f"LLT input mismatch: len(X_train) != len(y_train)")
        return np.array([]), np.array([])
    if X_train.size == 0 or y_train.size == 0:
        logging.error("LLT requires non-empty training data.")
        return np.array([]), np.array([])
    if X_test.size > 0 and X_test.shape[1:] != X_train.shape[1:]:
         logging.error(f"LLT train/test shape mismatch")
         return np.array([]), np.array([])

    try:
        num_features = X_train.shape[2]
        if num_features != len(FEATURES):
             logging.error(f"LLT: Feature count mismatch.")
             return np.array([]), np.array([])

        V = {j: {'0': [], '1': []} for j in range(num_features)}
        laws_computed_count = {j: {'0': 0, '1': 0} for j in range(num_features)}

        for i in range(len(X_train)):
            label = str(int(y_train[i]))
            if label not in ['0', '1']: continue
            for j in range(num_features):
                feature_data = X_train[i, :, j]
                A = create_embedding(feature_data, l=L, lag=LAG)
                if A.shape[0] < L: continue
                if np.isnan(A).any() or np.isinf(A).any(): continue
                try:
                    S = A.T @ A
                    if np.isnan(S).any() or np.isinf(S).any(): continue
                    U, s, Vt = svd(S, full_matrices=False)
                    if Vt.shape[0] < L or Vt.shape[1] != L: continue
                    if s[-1] < 1e-9: continue
                    v = Vt[-1]
                    norm = np.linalg.norm(v)
                    if norm < 1e-9: continue
                    V[j][label].append(v / norm)
                    laws_computed_count[j][label] += 1
                except np.linalg.LinAlgError: pass
                except Exception: pass

        valid_laws_exist = False
        for j in V:
            for c in ['0', '1']:
                if laws_computed_count[j][c] > 0:
                    valid_vecs = [vec for vec in V[j][c] if isinstance(vec, np.ndarray) and vec.shape == (L,)]
                    if not valid_vecs:
                         V[j][c] = np.zeros((L, 0))
                         continue
                    try:
                        V[j][c] = np.array(valid_vecs).T
                        if V[j][c].shape[0] != L:
                             V[j][c] = np.zeros((L, 0))
                        else:
                             valid_laws_exist = True
                    except Exception: V[j][c] = np.zeros((L, 0))
                else: V[j][c] = np.zeros((L, 0))

        if not valid_laws_exist:
             logging.error("LLT ERROR: No valid laws computed.")
             return np.array([]), np.array([])

        def transform_instance(X_instance):
            transformed_features = []
            if X_instance.ndim != 2 or X_instance.shape[0] != K or X_instance.shape[1] != num_features:
                return np.zeros(num_features * 2 * D)
            for j in range(num_features):
                feature_data = X_instance[:, j]
                A = create_embedding(feature_data, l=L, lag=LAG)
                if A.shape[0] < L:
                    transformed_features.extend([0.0] * (2 * D))
                    continue
                if np.isnan(A).any() or np.isinf(A).any():
                    transformed_features.extend([0.0] * (2 * D))
                    continue
                try:
                    S = A.T @ A
                    if np.isnan(S).any() or np.isinf(S).any():
                        transformed_features.extend([0.0] * (2 * D))
                        continue
                    for c in ['0', '1']:
                        if V[j][c].shape[1] == 0:
                            transformed_features.extend([0.0] * D)
                            continue
                        S_V = S @ V[j][c]
                        if S_V.size == 0 or np.isnan(S_V).any() or np.isinf(S_V).any():
                            transformed_features.extend([0.0] * D)
                            continue
                        variances = np.var(S_V, axis=0)
                        if variances.size == 0:
                            transformed_features.extend([0.0] * D)
                            continue
                        variances = np.nan_to_num(variances, nan=np.finfo(variances.dtype).max, posinf=np.finfo(variances.dtype).max, neginf=np.finfo(variances.dtype).max)
                        num_vars_available = variances.size
                        num_vars_to_select = min(D, num_vars_available)
                        smallest_indices = np.argpartition(variances, num_vars_to_select -1)[:num_vars_to_select]
                        smallest_vars = np.sort(variances[smallest_indices])
                        padded_vars = np.pad(smallest_vars, (0, D - num_vars_to_select), 'constant', constant_values=0.0)
                        if np.isnan(padded_vars).any() or np.isinf(padded_vars).any():
                            padded_vars = np.nan_to_num(padded_vars, nan=0.0, posinf=0.0, neginf=0.0)
                        transformed_features.extend(padded_vars)
                except Exception:
                     current_len = len(transformed_features)
                     expected_len_after_feature = (j + 1) * 2 * D
                     num_missing = expected_len_after_feature - current_len
                     if num_missing > 0: transformed_features.extend([0.0] * num_missing)
                     transformed_features = transformed_features[:expected_len_after_feature]

            correct_len = num_features * 2 * D
            if len(transformed_features) != correct_len:
                if len(transformed_features) < correct_len: transformed_features.extend([0.0] * (correct_len - len(transformed_features)))
                else: transformed_features = transformed_features[:correct_len]
            return np.array(transformed_features)

        X_train_t = np.array([transform_instance(X) for X in X_train])
        X_test_t = np.array([])
        if X_test.size > 0: X_test_t = np.array([transform_instance(X) for X in X_test])

        expected_dim = num_features * 2 * D
        if X_train_t.shape[0] != len(X_train) or (X_train_t.size > 0 and X_train_t.shape[1] != expected_dim):
             logging.error(f"LLT Train transform resulted in unexpected shape.")
             return np.array([]), np.array([])
        if X_test.size > 0 and (X_test_t.shape[0] != len(X_test) or (X_test_t.size > 0 and X_test_t.shape[1] != expected_dim)):
             logging.error(f"LLT Test transform resulted in unexpected shape.")
             return X_train_t, np.array([])

        return X_train_t, X_test_t
    except Exception as e:
        logging.exception("Error in llt_transform function")
        return np.array([]), np.array([])

def normalize_data(df):
    # (Keep this function as is)
    normalized_df = df.copy()
    if not isinstance(df, pd.DataFrame):
        logging.error("Normalize_data received non-DataFrame input.")
        return None
    for feature in FEATURES:
        if feature == 'timestamp': continue
        if feature not in df.columns:
             normalized_df[feature] = 0.0
             continue
        if pd.api.types.is_numeric_dtype(df[feature]):
            mean = df[feature].mean()
            std = df[feature].std()
            if std is not None and not pd.isna(std) and std > 1e-9:
                normalized_df[feature] = (df[feature] - mean) / std
            else:
                normalized_df[feature] = 0.0
            if normalized_df[feature].isnull().any():
                 normalized_df[feature] = normalized_df[feature].fillna(0.0)
        else:
             normalized_df[feature] = 0.0
    return normalized_df

def generate_synthetic_data(symbol, total_candles=WINDOW):
    # (Keep this function as is)
    logging.info(f"Generating synthetic data for {symbol} ({total_candles} candles)")
    np.random.seed(int(time.time() * 1000) % (2**32 - 1))
    end_time = pd.Timestamp.now(tz='UTC')
    timestamps = pd.date_range(end=end_time, periods=total_candles, freq='T')
    volatility = np.random.uniform(0.005, 0.03)
    base_price = np.random.uniform(1, 5000)
    prices = [base_price]
    for _ in range(1, total_candles):
        change = np.random.normal(0, volatility / np.sqrt(1440))
        prices.append(prices[-1] * (1 + change))
    prices = np.maximum(0.01, prices)
    close_prices = np.array(prices)
    open_prices = close_prices * (1 + np.random.normal(0, volatility / np.sqrt(1440) / 2, total_candles))
    high_prices = np.maximum(close_prices, open_prices) * (1 + np.random.uniform(0, volatility / np.sqrt(1440), total_candles))
    low_prices = np.minimum(close_prices, open_prices) * (1 - np.random.uniform(0, volatility / np.sqrt(1440), total_candles))
    high_prices = np.maximum.reduce([high_prices, open_prices, close_prices])
    low_prices = np.minimum.reduce([low_prices, open_prices, close_prices])
    volumes = np.random.poisson(base_price * np.random.uniform(1, 10)) * (1 + np.abs(np.diff(close_prices, prepend=close_prices[0])) / close_prices * 5)
    volumes = np.maximum(1, volumes)
    df = pd.DataFrame({
        'timestamp': timestamps, 'open': open_prices, 'high': high_prices,
        'low': low_prices, 'close': close_prices, 'volume': volumes
    })
    for col in FEATURES: df[col] = pd.to_numeric(df[col])
    df.reset_index(drop=True, inplace=True)
    return df

def prepare_training_data(symbol, total_candles_to_fetch=WINDOW + OVERLAP_STEP * 20):
    # (Keep this function as is)
    logging.info(f"Preparing training data for {symbol}...")
    try:
        required_base_candles = WINDOW
        estimated_candles_needed = required_base_candles + (MIN_TRAINING_EXAMPLES * 2) * OVERLAP_STEP + 500
        fetch_candle_count = max(WINDOW + 500, estimated_candles_needed)

        logging.info(f"Fetching {fetch_candle_count} candles for {symbol} training prep...")
        df = fetch_historical_data(symbol, timeframe='1m', total_candles=fetch_candle_count)

        if df is None or len(df) < WINDOW:
            logging.error(f"Insufficient data fetched for {symbol} ({len(df) if df is not None else 0} < {WINDOW}).")
            if USE_SYNTHETIC_DATA_FOR_LOW_VOLUME:
                 logging.warning(f"Attempting synthetic data generation for {symbol}.")
                 df = generate_synthetic_data(symbol, total_candles=WINDOW + OVERLAP_STEP * 10)
                 if df is None or len(df) < WINDOW:
                      logging.error(f"Synthetic data generation failed or insufficient for {symbol}.")
                      return None, None
                 else: logging.info(f"Using synthetic data ({len(df)} points) for {symbol}.")
            else: return None, None

        df_normalized = normalize_data(df)
        if df_normalized is None:
             logging.error(f"Normalization failed for {symbol}.")
             return None, None
        if df_normalized[FEATURES].isnull().any().any():
             logging.warning(f"NaN values found after normalization for {symbol}. Filling with 0.")
             df_normalized = df_normalized.fillna(0.0)

        X, y = [], []
        end_index = len(df)
        start_index = WINDOW
        num_windows_created = 0

        for i in range(end_index, start_index - 1, -OVERLAP_STEP):
            window_end_idx = i
            window_start_idx = i - WINDOW
            if window_start_idx < 0: continue

            window_orig = df.iloc[window_start_idx:window_end_idx]
            window_norm = df_normalized.iloc[window_start_idx:window_end_idx]

            if len(window_orig) != WINDOW or len(window_norm) != WINDOW: continue

            input_data_norm = window_norm.iloc[:K][FEATURES].values
            if input_data_norm.shape[0] != K or input_data_norm.shape[1] != len(FEATURES): continue
            if np.isnan(input_data_norm).any(): continue

            start_price_iloc_idx = K - 1
            end_price_iloc_idx = WINDOW - 1
            start_price = window_orig['close'].iloc[start_price_iloc_idx]
            end_price = window_orig['close'].iloc[end_price_iloc_idx]

            if pd.isna(start_price) or pd.isna(end_price) or start_price <= 0: continue

            X.append(input_data_norm)
            y.append(1 if end_price > start_price else 0)
            num_windows_created += 1

        if not X:
            logging.error(f"No valid windows created for {symbol}.")
            return None, None

        X = np.array(X)
        y = np.array(y)
        unique_classes, class_counts = np.unique(y, return_counts=True)
        class_dist_str = ", ".join([f"Class {cls}: {count}" for cls, count in zip(unique_classes, class_counts)])
        logging.info(f"Class distribution BEFORE balancing for {symbol}: {class_dist_str}")

        if len(unique_classes) < 2:
            logging.error(f"ONLY ONE CLASS ({unique_classes[0]}) present for {symbol}.")
            return None, None

        min_class_count = min(class_counts)
        if min_class_count * 2 < MIN_TRAINING_EXAMPLES:
             logging.error(f"Minority class count ({min_class_count}) too low for {symbol}.")
             return None, None

        samples_per_class = min_class_count
        balanced_indices = []
        for class_val in unique_classes:
            class_indices = np.where(y == class_val)[0]
            num_to_choose = min(samples_per_class, len(class_indices))
            chosen_indices = np.random.choice(class_indices, size=num_to_choose, replace=False)
            balanced_indices.extend(chosen_indices)

        np.random.shuffle(balanced_indices)
        X_balanced = X[balanced_indices]
        y_balanced = y[balanced_indices]
        final_unique, final_counts = np.unique(y_balanced, return_counts=True)
        logging.info(f"Balanced dataset for {symbol}: {len(X_balanced)} instances. Final counts: {dict(zip(final_unique, final_counts))}")

        if len(X_balanced) < MIN_TRAINING_EXAMPLES:
             logging.error(f"Insufficient data ({len(X_balanced)}) for {symbol} AFTER balancing.")
             return None, None
        if X_balanced.ndim != 3 or X_balanced.shape[0] == 0 or X_balanced.shape[1] != K or X_balanced.shape[2] != len(FEATURES):
             logging.error(f"Final balanced data has unexpected shape {X_balanced.shape} for {symbol}.")
             return None, None

        return X_balanced, y_balanced
    except Exception as e:
        logging.exception(f"Error preparing training data for {symbol}")
        return None, None

def train_model(symbol):
    # (Keep this function as is)
    logging.info(f"--- Attempting to train model for {symbol} ---")
    np.random.seed(int(time.time()) % (2**32 - 1))

    X, y = prepare_training_data(symbol)
    if X is None or y is None:
        logging.error(f"Failed to prepare training data for {symbol}. Training aborted.")
        return None, None, None

    try:
        accuracy = -1.0
        if len(X) < MIN_TRAINING_EXAMPLES + 2:
             logging.warning(f"Dataset for {symbol} too small ({len(X)}). Training on all data.")
             X_train, y_train = X, y
             X_val, y_val = np.array([]), np.array([])
        else:
             indices = np.random.permutation(len(X))
             val_size = max(1, int(len(X) * 0.2))
             split_idx = len(X) - val_size
             train_indices, val_indices = indices[:split_idx], indices[split_idx:]
             if len(train_indices) == 0 or len(val_indices) == 0:
                  logging.error(f"Train/Val split resulted in zero samples. Training on all data.")
                  X_train, y_train = X, y
                  X_val, y_val = np.array([]), np.array([])
             else:
                  X_train, X_val = X[train_indices], X[val_indices]
                  y_train, y_val = y[train_indices], y[val_indices]
                  if len(np.unique(y_train)) < 2:
                       logging.error(f"Only one class in TRAINING set after split for {symbol}. Aborting.")
                       return None, None, None
                  if len(np.unique(y_val)) < 2:
                       logging.warning(f"Only one class in VALIDATION set after split for {symbol}.")

        if X_val.size == 0: X_val_shaped = np.empty((0, K, len(FEATURES)))
        else: X_val_shaped = X_val

        X_train_t, X_val_t = llt_transform(X_train, y_train, X_val_shaped)

        if X_train_t.size == 0:
            logging.error(f"LLT training transformation failed for {symbol}. Training aborted.")
            return None, None, None
        if X_val.size > 0 and X_val_t.size == 0:
             logging.warning(f"LLT validation transformation failed for {symbol}.")
             accuracy = -1.0
        if np.isnan(X_train_t).any() or np.isinf(X_train_t).any():
             logging.error(f"NaN/Inf in LLT transformed TRAINING data for {symbol}. Training aborted.")
             return None, None, None
        if X_val_t.size > 0 and (np.isnan(X_val_t).any() or np.isinf(X_val_t).any()):
             logging.warning(f"NaN/Inf in LLT transformed VALIDATION data for {symbol}.")
             accuracy = -1.0

        n_neighbors = min(5, len(y_train) - 1) if len(y_train) > 1 else 1
        n_neighbors = max(1, n_neighbors)
        if n_neighbors > 1 and n_neighbors % 2 == 0: n_neighbors -= 1

        model = KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance')
        model.fit(X_train_t, y_train)

        if accuracy != -1.0 and X_val_t.size > 0:
            try:
                accuracy = model.score(X_val_t, y_val)
                logging.info(f"Model for {symbol} trained. Validation Accuracy: {accuracy:.3f}")
            except Exception as eval_e:
                 logging.exception(f"Error during KNN validation scoring for {symbol}: {eval_e}")
                 accuracy = -1.0
        elif accuracy == -1.0:
             logging.info(f"Model for {symbol} trained. Validation skipped or failed.")
        else:
            logging.info(f"Model for {symbol} trained. No validation data.")
            accuracy = -1.0

        return model, X_train, y_train
    except Exception as e:
        logging.exception(f"Error during model training pipeline for {symbol}")
        return None, None, None

def predict_real_time(symbol, model_data):
    # (Keep this function as is)
    if model_data is None: return "Model N/A", 0.0
    model, X_train_orig_for_llt, y_train_orig_for_llt = model_data

    if model is None or X_train_orig_for_llt is None or y_train_orig_for_llt is None:
         logging.error(f"Invalid model data tuple for prediction on {symbol}")
         return "Model Error", 0.0
    if X_train_orig_for_llt.size == 0 or y_train_orig_for_llt.size == 0:
         logging.error(f"Training data for LLT laws is empty for {symbol}")
         return "LLT Data Error", 0.0

    try:
        df = fetch_historical_data(symbol, timeframe='1m', total_candles=K + 60)
        if df is None or len(df) < K:
            return "Data Error", 0.0

        df_recent = df.iloc[-K:]
        if len(df_recent) != K:
            return "Data Error", 0.0

        df_recent_normalized = normalize_data(df_recent)
        if df_recent_normalized is None: return "Norm Error", 0.0
        if df_recent_normalized[FEATURES].isnull().any().any():
             df_recent_normalized = df_recent_normalized.fillna(0.0)

        X_predict_input = np.array([df_recent_normalized[FEATURES].values])
        _, X_predict_transformed = llt_transform(X_train_orig_for_llt, y_train_orig_for_llt, X_predict_input)

        if X_predict_transformed.size == 0 or X_predict_transformed.shape[0] != 1:
            return "Transform Error", 0.0
        if np.isnan(X_predict_transformed).any() or np.isinf(X_predict_transformed).any():
             X_predict_transformed = np.nan_to_num(X_predict_transformed, nan=0.0, posinf=0.0, neginf=0.0)

        try:
            probabilities = model.predict_proba(X_predict_transformed)
            if probabilities.shape[0] != 1 or probabilities.shape[1] != 2:
                 return "Predict Error", 0.0

            prob_class_1 = probabilities[0, 1]
            prediction_label = "Rise" if prob_class_1 >= 0.5 else "Fall"
            confidence = prob_class_1 if prediction_label == "Rise" else probabilities[0, 0]
            return prediction_label, confidence

        except Exception as knn_e:
            logging.exception(f"Error during KNN prediction probability for {symbol}")
            return "Predict Error", 0.0

    except Exception as e:
        logging.exception(f"Error in predict_real_time for {symbol}")
        return "Error", 0.0

# --- TA Calculation Function (Using TA-Lib) ---
def calculate_ta_indicators(df_ta):
    """
    Calculates TA indicators (RSI, MACD, VWAP, ATR) using TA-Lib.
    Requires df_ta to have 'open', 'high', 'low', 'close', 'volume' columns.
    """
    indicators = {'RSI': np.nan, 'MACD': np.nan, 'MACD_Signal': np.nan, 'MACD_Hist': np.nan, 'VWAP': np.nan, 'ATR': np.nan}
    required_cols = ['open', 'high', 'low', 'close', 'volume']
    min_len_needed = max(RSI_PERIOD, MACD_SLOW, ATR_PERIOD) + 1 # TA-Lib often needs P+1

    if df_ta is None or len(df_ta) < min_len_needed:
        logging.warning(f"Insufficient data ({len(df_ta) if df_ta is not None else 0} < {min_len_needed}) for TA-Lib calculations.")
        return indicators

    # Ensure columns exist
    if not all(col in df_ta.columns for col in required_cols):
         logging.error(f"Missing required columns for TA-Lib: Have {df_ta.columns}, Need {required_cols}")
         return indicators

    # --- Prepare data for TA-Lib (NumPy arrays, handle NaNs) ---
    df_ta = df_ta[required_cols].copy() # Work on a copy with only needed columns

    # Check for NaNs BEFORE converting to numpy, TA-Lib generally dislikes them
    if df_ta.isnull().values.any():
        nan_count = df_ta.isnull().sum().sum()
        logging.warning(f"Found {nan_count} NaN(s) in TA input data. Applying ffill()...")
        df_ta.ffill(inplace=True) # Forward fill NaNs
        # Check again after ffill - if NaNs remain (e.g., at the start), need more robust handling
        if df_ta.isnull().values.any():
            logging.error(f"NaNs still present after ffill. Cannot proceed with TA-Lib.")
            return indicators # Return NaNs

    try:
        # Convert to NumPy arrays of type float
        open_p = df_ta['open'].values.astype(float)
        high_p = df_ta['high'].values.astype(float)
        low_p = df_ta['low'].values.astype(float)
        close_p = df_ta['close'].values.astype(float)
        volume_p = df_ta['volume'].values.astype(float)

        # --- Calculate Indicators using TA-Lib ---
        # RSI
        rsi_values = talib.RSI(close_p, timeperiod=RSI_PERIOD)
        indicators['RSI'] = rsi_values[-1] if len(rsi_values) > 0 else np.nan

        # MACD
        macd_line, signal_line, hist = talib.MACD(close_p, fastperiod=MACD_FAST, slowperiod=MACD_SLOW, signalperiod=MACD_SIGNAL)
        indicators['MACD'] = macd_line[-1] if len(macd_line) > 0 else np.nan
        indicators['MACD_Signal'] = signal_line[-1] if len(signal_line) > 0 else np.nan
        indicators['MACD_Hist'] = hist[-1] if len(hist) > 0 else np.nan

        # ATR
        atr_values = talib.ATR(high_p, low_p, close_p, timeperiod=ATR_PERIOD)
        indicators['ATR'] = atr_values[-1] if len(atr_values) > 0 else np.nan

        # VWAP (Manual Calculation - TA-Lib doesn't have it built-in)
        typical_price = (high_p + low_p + close_p) / 3.0
        tp_vol = typical_price * volume_p
        cumulative_volume = np.cumsum(volume_p)
        # Avoid division by zero if volume is zero for initial periods
        if cumulative_volume[-1] > 1e-12: # Check if there's significant volume
            vwap_values = np.cumsum(tp_vol) / np.maximum(cumulative_volume, 1e-12) # Avoid div by zero strictly
            indicators['VWAP'] = vwap_values[-1]
        else:
            indicators['VWAP'] = np.nan # VWAP undefined if no volume

        # Final check for NaNs in results (TA-Lib might return NaN for initial periods)
        for key, value in indicators.items():
             if pd.isna(value):
                 indicators[key] = np.nan # Ensure consistent NaN representation

        # logging.debug(f"TA-Lib Indicators calculated: {indicators}")
        return indicators

    except Exception as ta_e:
        logging.exception(f"Error calculating TA indicators using TA-Lib: {ta_e}")
        return {k: np.nan for k in indicators} # Return NaNs on error

# --- Trade Level Calculation (Unchanged) ---
def calculate_trade_levels(prediction, confidence, current_price, atr):
    # (Keep this function as is - no changes needed)
    levels = {'Entry': np.nan, 'TP1': np.nan, 'TP2': np.nan, 'SL': np.nan}
    if pd.isna(current_price) or current_price <= 0 or pd.isna(atr) or atr <= 0:
        return levels
    if prediction == "Rise" and confidence >= CONFIDENCE_THRESHOLD:
        entry_price = current_price
        levels['Entry'] = entry_price
        levels['TP1'] = entry_price + TP1_ATR_MULTIPLIER * atr
        levels['TP2'] = entry_price + TP2_ATR_MULTIPLIER * atr
        levels['SL'] = entry_price - SL_ATR_MULTIPLIER * atr
        levels['SL'] = max(0.01, levels['SL'])
    # Add Fall logic here if needed
    return levels

# --- Concurrency Wrappers (Unchanged) ---
def train_model_task(coin):
    # (Keep this function as is)
    try:
        result = train_model(coin)
        if result != (None, None, None):
            model, X_train_orig, y_train_orig = result
            return coin, (model, X_train_orig, y_train_orig)
        else:
            return coin, None
    except Exception as e:
        logging.exception(f"Unhandled exception in train_model_task for {coin}")
        return coin, None

def train_all_models(coin_list=None, num_workers=NUM_WORKERS_TRAINING):
    # (Keep this function as is)
    global trained_models
    start_time = time.time()
    if coin_list is None or not coin_list:
        logging.info("No coin list provided, fetching top coins by volume...")
        try:
            coin_list = get_all_usdt_pairs()
            if not coin_list:
                 msg = "Failed to fetch coin list even with fallback. Training aborted."
                 logging.error(msg)
                 return msg
        except Exception as e:
             msg = f"Error fetching coin list: {e}. Training aborted."
             logging.exception(msg)
             return msg

    logging.info(f"Starting training for {len(coin_list)} coins using {num_workers} workers...")
    results_log = []
    successful_trains = 0
    failed_trains = 0
    new_models = {}

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers, thread_name_prefix='TrainWorker') as executor:
        future_to_coin = {executor.submit(train_model_task, coin): coin for coin in coin_list}
        processed_count = 0
        total_coins = len(coin_list)
        for future in concurrent.futures.as_completed(future_to_coin):
            processed_count += 1
            coin = future_to_coin[future]
            try:
                returned_coin, model_data = future.result()
                if returned_coin == coin and model_data is not None:
                    new_models[returned_coin] = model_data
                    results_log.append(f"✅ {returned_coin}: Model trained successfully.")
                    successful_trains += 1
                else:
                    results_log.append(f"❌ {coin}: Model training failed (check logs).")
                    failed_trains += 1
            except Exception as e:
                results_log.append(f"❌ {coin}: Training task generated exception: {e}")
                failed_trains += 1
                logging.exception(f"Exception from training future for {coin}")
            if processed_count % 10 == 0 or processed_count == total_coins:
                logging.info(f"Training progress: {processed_count}/{total_coins} coins processed.")
            logging.getLogger().handlers[0].flush()

    trained_models.update(new_models)
    logging.info(f"Updated global models dictionary. Total models now: {len(trained_models)}")

    end_time = time.time()
    duration = end_time - start_time
    completion_message = (
        f"Training run completed in {duration:.2f} seconds.\n"
        f"Successfully trained: {successful_trains}\n"
        f"Failed to train: {failed_trains}\n"
        f"Total models available now: {len(trained_models)}"
    )
    logging.info(completion_message)
    return completion_message + "\n\n" + "\n".join(results_log[-20:])

# --- Update Predictions Table (Mostly Unchanged, uses new TA function) ---
def update_predictions_table():
    # (This function structure remains the same, it just calls the new calculate_ta_indicators)
    global last_update_time
    logging.info("--- Updating Predictions Table ---")
    start_time = time.time()
    predictions_data = {}
    current_models = trained_models.copy()

    if not current_models:
        msg = "No models available. Please train first."
        logging.warning(msg)
        cols = ['Rank', 'Coin', 'Prediction', 'Confidence', 'Price', 'Volume (Quote)', 'Entry', 'Entry Time', 'Exit Time', 'TP1', 'TP2', 'SL', 'RSI', 'MACD Hist', 'VWAP', 'ATR']
        return pd.DataFrame([], columns=cols), msg

    symbols_with_models = list(current_models.keys())
    logging.info(f"Step 1: Generating predictions for {len(symbols_with_models)} models...")
    # --- Stage 1: Get Predictions Concurrently ---
    with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_WORKERS_PREDICTION, thread_name_prefix='PredictWorker') as executor:
        future_to_coin_pred = {executor.submit(predict_real_time, coin, model_data): coin for coin, model_data in current_models.items()}
        pred_success = 0
        pred_fail = 0
        for future in concurrent.futures.as_completed(future_to_coin_pred):
            coin = future_to_coin_pred[future]
            try:
                pred, conf = future.result()
                if pred not in ["Model N/A", "Model Error", "Data Error", "Norm Error", "LLT Data Error", "Transform Error", "Predict Error", "Error"]:
                    predictions_data[coin] = {'prediction': pred, 'confidence': float(conf)}
                    pred_success += 1
                else:
                     predictions_data[coin] = {'prediction': pred, 'confidence': 0.0}
                     pred_fail += 1
            except Exception as e:
                 logging.exception(f"Error getting prediction result for {coin}")
                 predictions_data[coin] = {'prediction': "Future Error", 'confidence': 0.0}
                 pred_fail +=1
    logging.info(f"Step 1 Complete: Predictions generated ({pred_success} success, {pred_fail} fail).")

    # --- Stage 2: Fetch Current Tickers & TA Data Concurrently ---
    symbols_to_fetch_data = list(predictions_data.keys())
    if not symbols_to_fetch_data:
        logging.warning("No symbols with predictions to fetch data for.")
        cols = ['Rank', 'Coin', 'Prediction', 'Confidence', 'Price', 'Volume (Quote)', 'Entry', 'Entry Time', 'Exit Time', 'TP1', 'TP2', 'SL', 'RSI', 'MACD Hist', 'VWAP', 'ATR']
        return pd.DataFrame([], columns=cols), "No symbols processed."

    logging.info(f"Step 2: Fetching Tickers and {TA_DATA_POINTS} OHLCV candles for {len(symbols_to_fetch_data)} symbols...")
    tickers_data = {}
    ohlcv_data = {}
    try: # Fetch Tickers
        batch_size_tickers = 100
        fetched_tickers_batch = {}
        for i in range(0, len(symbols_to_fetch_data), batch_size_tickers):
            batch_symbols = symbols_to_fetch_data[i:i+batch_size_tickers]
            try:
                batch_tickers = exchange.fetch_tickers(symbols=batch_symbols)
                fetched_tickers_batch.update(batch_tickers)
                time.sleep(exchange.rateLimit / 1000 * 0.5)
            except Exception as e:
                 logging.error(f"Failed to fetch ticker batch starting with {batch_symbols[0]}: {e}")
        tickers_data = fetched_tickers_batch
        logging.info(f"Fetched {len(tickers_data)} tickers.")
    except Exception as e:
        logging.exception(f"Error fetching tickers in prediction update: {e}")

    # Fetch OHLCV for TA
    with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_WORKERS_PREDICTION, thread_name_prefix='TADataWorker') as executor:
        future_to_coin_ohlcv = {executor.submit(fetch_historical_data, coin, '1m', TA_DATA_POINTS): coin for coin in symbols_to_fetch_data}
        for future in concurrent.futures.as_completed(future_to_coin_ohlcv):
            coin = future_to_coin_ohlcv[future]
            try:
                df_ta = future.result()
                if df_ta is not None and len(df_ta) == TA_DATA_POINTS:
                    # Ensure standard column names expected by calculate_ta_indicators
                    df_ta.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
                    ohlcv_data[coin] = df_ta
            except Exception as e:
                logging.exception(f"Error fetching TA OHLCV data for {coin}")
    logging.info(f"Step 2 Complete: Fetched TA data for {len(ohlcv_data)} symbols.")

    # --- Stage 3: Calculate TA & Trade Levels ---
    logging.info(f"Step 3: Calculating TA (using TA-Lib) and Trade Levels...")
    final_results = []
    processing_time = datetime.now(timezone.utc)

    for symbol in symbols_to_fetch_data:
        pred_info = predictions_data.get(symbol, {'prediction': 'Missing Pred', 'confidence': 0.0})
        ticker = tickers_data.get(symbol)
        df_ta = ohlcv_data.get(symbol) # This df should have standard columns now

        current_price, quote_volume = np.nan, np.nan
        ta_indicators = {k: np.nan for k in ['RSI', 'MACD', 'MACD_Signal', 'MACD_Hist', 'VWAP', 'ATR']}
        trade_levels = {k: np.nan for k in ['Entry', 'TP1', 'TP2', 'SL']}
        entry_time, exit_time = pd.NaT, pd.NaT

        if ticker and isinstance(ticker, dict):
            current_price = ticker.get('last', np.nan)
            quote_volume = ticker.get('info', {}).get('quoteVolume')
            if quote_volume is None:
                 base_volume = ticker.get('baseVolume')
                 if base_volume is not None and current_price is not None:
                      try: quote_volume = float(base_volume) * float(current_price)
                      except (ValueError, TypeError): quote_volume = np.nan
            try: current_price = float(current_price) if current_price is not None else np.nan
            except (ValueError, TypeError): current_price = np.nan
            try: quote_volume = float(quote_volume) if quote_volume is not None else np.nan
            except (ValueError, TypeError): quote_volume = np.nan

        # Calculate TA using the new function
        if df_ta is not None:
            ta_indicators = calculate_ta_indicators(df_ta) # Calls the TA-Lib version

        if pred_info['prediction'] in ["Rise", "Fall"] and not pd.isna(current_price) and not pd.isna(ta_indicators['ATR']):
            trade_levels = calculate_trade_levels(pred_info['prediction'], pred_info['confidence'], current_price, ta_indicators['ATR'])
            if not pd.isna(trade_levels['Entry']):
                entry_time = processing_time
                exit_time = processing_time + timedelta(hours=PREDICTION_WINDOW_HOURS)

        final_results.append({
            'coin': symbol.split('/')[0], 'full_symbol': symbol,
            'prediction': pred_info['prediction'], 'confidence': pred_info['confidence'],
            'price': current_price, 'volume': quote_volume,
            'entry': trade_levels['Entry'], 'entry_time': entry_time, 'exit_time': exit_time,
            'tp1': trade_levels['TP1'], 'tp2': trade_levels['TP2'], 'sl': trade_levels['SL'],
            'rsi': ta_indicators['RSI'], 'macd_hist': ta_indicators['MACD_Hist'],
            'vwap': ta_indicators['VWAP'], 'atr': ta_indicators['ATR']
        })
    logging.info("Step 3 Complete: TA and Trade Levels calculated.")

    # --- Stage 4: Sort and Format (Unchanged) ---
    def sort_key(item):
        pred, conf = item['prediction'], item['confidence']
        if pred == "Rise" and conf >= CONFIDENCE_THRESHOLD and not pd.isna(item['entry']): return (0, -conf)
        elif pred == "Rise": return (1, -conf)
        elif pred == "Fall": return (2, -conf)
        else: return (3, 0)
    final_results.sort(key=sort_key)

    formatted_output = []
    for i, p in enumerate(final_results[:MAX_COINS_TO_DISPLAY]):
        formatted_output.append([
            i + 1, p['coin'], p['prediction'], f"{p['confidence']:.3f}",
            f"{p['price']:.4f}" if not pd.isna(p['price']) else "N/A",
            f"{p['volume']:,.0f}" if not pd.isna(p['volume']) else "N/A",
            f"{p['entry']:.4f}" if not pd.isna(p['entry']) else "N/A",
            format_datetime(p['entry_time'], "N/A"), format_datetime(p['exit_time'], "N/A"),
            f"{p['tp1']:.4f}" if not pd.isna(p['tp1']) else "N/A",
            f"{p['tp2']:.4f}" if not pd.isna(p['tp2']) else "N/A",
            f"{p['sl']:.4f}" if not pd.isna(p['sl']) else "N/A",
            f"{p['rsi']:.2f}" if not pd.isna(p['rsi']) else "N/A",
            f"{p['macd_hist']:.4f}" if not pd.isna(p['macd_hist']) else "N/A",
            f"{p['vwap']:.4f}" if not pd.isna(p['vwap']) else "N/A",
            f"{p['atr']:.4f}" if not pd.isna(p['atr']) else "N/A",
        ])

    output_columns = [
        'Rank', 'Coin', 'Prediction', 'Confidence', 'Price', 'Volume (Quote)',
        'Entry', 'Entry Time', 'Exit Time', 'TP1', 'TP2', 'SL',
        'RSI', 'MACD Hist', 'VWAP', 'ATR'
    ]
    output_df = pd.DataFrame(formatted_output, columns=output_columns)

    end_time = time.time()
    duration = end_time - start_time
    last_update_time = processing_time
    status_message = f"Predictions updated ({len(final_results)} symbols processed) in {duration:.2f}s. Last update: {format_datetime(last_update_time)}"
    logging.info(status_message)

    return output_df, status_message


# --- Gradio UI Handlers (Unchanged) ---
def handle_train_click(coin_input, num_workers):
    # (Keep this function as is)
    logging.info(f"Train button clicked. Workers: {num_workers}")
    coins = None
    num_workers = int(num_workers)
    if coin_input and coin_input.strip():
        raw_coins = coin_input.replace(',', ' ').split()
        coins = []
        valid = True
        for c in raw_coins:
            coin_upper = c.strip().upper()
            if '/' not in coin_upper: coin_upper += '/USDT'
            if coin_upper.endswith('/USDT'): coins.append(coin_upper)
            else:
                 valid = False
                 logging.error(f"Invalid coin format: {c}. Must be SYMBOL or SYMBOL/USDT.")
                 break
        if not valid: return "Error: Custom coins must be valid SYMBOL or SYMBOL/USDT pairs."
        logging.info(f"Training requested for custom coin list: {coins}")
    else:
        logging.info("Training requested for top coins by volume.")
    train_status = train_all_models(coin_list=coins, num_workers=num_workers)
    return f"--- Training Run ---:\n{train_status}\n\n---> Press 'Refresh Predictions' <---"

def handle_refresh_click():
    # (Keep this function as is)
    logging.info("Refresh button clicked.")
    try:
        df, status = update_predictions_table()
        return df, status
    except Exception as e:
        logging.exception("Error during handle_refresh_click")
        cols = ['Rank', 'Coin', 'Prediction', 'Confidence', 'Price', 'Volume (Quote)', 'Entry', 'Entry Time', 'Exit Time', 'TP1', 'TP2', 'SL', 'RSI', 'MACD Hist', 'VWAP', 'ATR']
        return pd.DataFrame([], columns=cols), f"Error updating predictions: {e}"

# --- Gradio Interface Definition (Unchanged) ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Cryptocurrency Prediction & TA Signal Explorer (LLT-KNN + TA-Lib)") # Updated title slightly
    gr.Markdown(f"""
        Predicts **{PREDICTION_WINDOW_HOURS}-hour** price direction (Rise/Fall) using LLT-KNN.
        Displays current price, volume, TA indicators (RSI, MACD, VWAP, ATR calculated using **TA-Lib**), and potential trade levels for **Rise** signals meeting confidence >= **{CONFIDENCE_THRESHOLD}**.
        TP/SL levels based on **{TP1_ATR_MULTIPLIER}x / {TP2_ATR_MULTIPLIER}x / {SL_ATR_MULTIPLIER}x ATR({ATR_PERIOD})**.
        **Warning:** Educational. High risk. Not financial advice. Ensure TA-Lib is correctly installed.
        """)

    with gr.Row():
        with gr.Column(scale=4):
            prediction_df = gr.Dataframe(
                headers=[
                    'Rank', 'Coin', 'Prediction', 'Confidence', 'Price', 'Volume (Quote)',
                    'Entry', 'Entry Time', 'Exit Time', 'TP1', 'TP2', 'SL',
                    'RSI', 'MACD Hist', 'VWAP', 'ATR'
                ],
                datatype=[
                    'number', 'str', 'str', 'str', 'str', 'str',
                    'str', 'str', 'str', 'str', 'str', 'str',
                    'str', 'str', 'str', 'str'
                    ],
                row_count=15, col_count=(16, "fixed"), label="Predictions & TA Signals", wrap=True,
            )
        with gr.Column(scale=1):
            with gr.Accordion("Train Models", open=True):
                 coin_input = gr.Textbox(label="Train Specific Coins (e.g., BTC, ETH/USDT)", placeholder="Leave empty for top coins by volume")
                 max_workers_slider = gr.Slider(minimum=1, maximum=10, value=NUM_WORKERS_TRAINING, step=1, label="Parallel Training Workers")
                 train_button = gr.Button("Start Training", variant="primary")
            refresh_button = gr.Button("Refresh Predictions", variant="secondary")
            status_text = gr.Textbox(label="Status Log", lines=15, interactive=False, max_lines=30)

    gr.Markdown(
        """
        ## Notes
        - **TA-Lib**: This version uses the TA-Lib library for indicators. Ensure it's installed correctly (can be tricky).
        - **Data**: Fetches OHLCV data (Bitget, 1-min). Uses cache. Handles rate limits.
        - **Training**: Uses past ~14h data (12h train, 2h predict). Normalizes, balances classes, applies LLT, trains KNN.
        - **Prediction**: Uses latest 12h data for KNN input.
        - **Trade Levels**: Only shown for 'Rise' predictions above confidence threshold. Based on current price and ATR volatility. **Highly speculative.**
        - **Sorting**: Table sorted by (Potential Rise Signals > Other Rise > Fall > Errors), then by confidence descending.
        - **Refresh**: Fetches latest prices/TA and re-evaluates signals.
        """
    )
    train_button.click(fn=handle_train_click, inputs=[coin_input, max_workers_slider], outputs=status_text)
    refresh_button.click(fn=handle_refresh_click, inputs=None, outputs=[prediction_df, status_text])

# --- Startup Initialization (Unchanged) ---
def initialize_models_on_startup():
    # (Keep this function as is)
    logging.info("----- Initializing Models (Startup Thread) -----")
    default_coins = ['BTC/USDT', 'ETH/USDT', 'SOL/USDT', 'XRP/USDT', 'DOGE/USDT']
    try:
        initial_status = train_all_models(default_coins, num_workers=2)
        logging.info("----- Initial Model Training Complete -----")
        logging.info(initial_status)
    except Exception as e:
         logging.exception("Error during startup initialization.")

# --- Main Execution (Unchanged) ---
if __name__ == "__main__":
    logging.info("Starting application...")
    # Check if TA-Lib import worked (basic check)
    try:
        # Try accessing a TA-Lib function
        _ = talib.RSI(np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]))
        logging.info("TA-Lib library seems accessible.")
    except NameError:
        logging.error("FATAL: TA-Lib library not found or import failed. Please install it correctly.")
        sys.exit(1)
    except Exception as ta_init_e:
        logging.error(f"FATAL: Error testing TA-Lib library: {ta_init_e}. Please check installation.")
        sys.exit(1)

    init_thread = threading.Thread(target=initialize_models_on_startup, name="StartupTrainThread", daemon=True)
    init_thread.start()

    logging.info("Launching Gradio Interface...")
    try:
        demo.launch(server_name="0.0.0.0")
    except Exception as e:
        logging.exception("Failed to launch Gradio interface.")
    finally:
        logging.info("Gradio Interface stopped.")