Source code for resonance.api.validation

"""DataFrame validation utilities for scan plan creation"""

import re

import pandas as pd

from .types import ValidationError
from .types import motor as valid_motors

# ============================================================================
# Exposure Column Detection
# ============================================================================

EXPOSURE_PATTERNS = [
    r"^exposure$",
    r"^exp$",
    r"^count[_\s]?time",
    r"^integration[_\s]?time",
    r"^unnamed.*",  # Pandas unnamed column pattern
    r"^$",  # Empty string for final unnamed column
]


[docs] def find_exposure_column(df: pd.DataFrame) -> str | None: """ Find exposure time column using pattern matching. Handles common variations: - "exposure", "exp" - "count_time", "count time" - "Unnamed: 2" (pandas default for unnamed columns) - "" (empty string column name) Parameters: df: Input DataFrame Returns: Column name if found, None otherwise """ for col in df.columns: col_str = str(col).lower().strip() for pattern in EXPOSURE_PATTERNS: if re.match(pattern, col_str, re.IGNORECASE): return col return None
# ============================================================================ # Motor Column Validation # ============================================================================
[docs] def validate_motor_columns(df: pd.DataFrame) -> list[str]: """ Validate that DataFrame columns match known motor names. Parameters: df: Input DataFrame Returns: List of valid motor column names Raises: ValidationError: If invalid columns found or no motor columns present """ motor_cols = [] exposure_col = find_exposure_column(df) for col in df.columns: # Skip exposure column if col == exposure_col: continue # Check if column is a valid motor name if col not in valid_motors: # Provide helpful error message similar_motors = [m for m in valid_motors if col.lower() in m.lower()] error_msg = f"Column '{col}' is not a valid motor name." if similar_motors: error_msg += f"\n Did you mean one of: {similar_motors[:3]}?" else: error_msg += f"\n Valid motors include: {list(valid_motors)[:5]}..." raise ValidationError(error_msg) motor_cols.append(col) if not motor_cols: raise ValidationError( "DataFrame must contain at least one motor column.\n" f"Valid motor names: {list(valid_motors)[:10]}..." ) return motor_cols
[docs] def validate_scan_dataframe(df: pd.DataFrame) -> tuple[list[str], str | None]: """ Validate complete scan DataFrame. Parameters: df: Input DataFrame with motor columns and optional exposure column Returns: Tuple of (motor_column_names, exposure_column_name) Raises: ValidationError: If validation fails """ if df.empty: raise ValidationError("DataFrame is empty") # Find exposure column exposure_col = find_exposure_column(df) # Validate motor columns motor_cols = validate_motor_columns(df) # Validate exposure values if column exists if exposure_col is not None: exposure_values = df[exposure_col] if not isinstance(exposure_values, pd.Series): raise ValidationError( f"Exposure column '{exposure_col}' is not a pandas Series" ) # Check for NaN values in exposure if exposure_values.isna().any(): nan_indices = df[exposure_values.isna()].index.tolist() raise ValidationError( f"NaN values found in exposure column '{exposure_col}' at rows: {nan_indices}" ) # Check for invalid values if (exposure_values <= 0).any(): invalid_indices = df[exposure_values <= 0].index.tolist() raise ValidationError( f"Invalid exposure times (must be > 0) at rows: {invalid_indices}" ) # Check for NaN values in motor columns for col in motor_cols: if not isinstance(df[col], pd.Series): raise ValidationError(f"Motor column '{col}' is not a pandas Series") if df[col].isna().any(): # pyright: ignore[reportGeneralTypeIssues] nan_indices = df[df[col].isna()].index.tolist() raise ValidationError( f"NaN values found in motor column '{col}' at rows: {nan_indices}" ) return motor_cols, exposure_col